From 53fb986994102e5fff3d69a062423035759867e3 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Tue, 20 Dec 2022 02:30:34 -0500 Subject: [PATCH] Comprehensive working version Code could be cleaned, but all functions seem to work --- lib/Mail/SpamAssassin/KeywordRuleGenerator.pm | 563 ++++++++++++------ t/04_rules.t | 104 ++-- t/05_write.t | 57 ++ t/T05/sa_plugins.pre | 1 + 4 files changed, 491 insertions(+), 234 deletions(-) create mode 100644 t/05_write.t create mode 100644 t/T05/sa_plugins.pre diff --git a/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm b/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm index 220ab9c..4144db2 100644 --- a/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm +++ b/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm @@ -72,7 +72,7 @@ and will output to the file: 70_id_example.cf -See the C methods for more information on this formatting. Also see +See the C methods for more information on this formatting. Also see the C method for discussion of the 'id'. Finally, a like file: @@ -134,10 +134,17 @@ our %EXPORT_TAGS = ( 'setPriority', 'getOutfile', 'setOutfile', + 'getScoresOutfile', + 'setScoresOutfile', 'getGlobalOutfile', 'setGlobalOutfile', - 'scorefile', - 'clearFiles', + 'getGlobalScoresOutfile', + 'setGlobalScoresOutfile', + 'getOutput', + 'getScoresOutput', + 'getGlobalOutput', + 'getGlobalScoresOutput', + 'clearFile', 'getFiles', 'nextFile', 'readFile', @@ -145,10 +152,12 @@ our %EXPORT_TAGS = ( 'setFile', 'createDir', 'cleanDir', - 'writeMetas', - 'writeWords', - 'writeGroups', - 'writeGlobals', + 'generateMetas', + 'generateScored', + 'generateGroups', + 'generateGlobals', + 'generateAll', + 'writeFile', 'writeAll' ] ); @@ -280,6 +289,7 @@ sub setId if (defined($id)) { $id = uc($id); + return "Must start with a letter" unless ($id =~ m/^[A-Z]/); $self->{'id'} = $id || return "Failed to set $id"; } else { return "No ID provided\n"; @@ -337,6 +347,7 @@ sub getFile if (defined($self->{'file'})) { return $self->{'file'}; } + return undef; } =head2 C<$kw->setFile()> @@ -352,8 +363,10 @@ sub setFile if (defined($file)) { $self->{'file'} = $file; + $self->setOutfile(); + $self->setScoresOutfile(); } else { - return "No File provided\n"; + delete($self->{'file'}); } } @@ -409,6 +422,65 @@ sub setOutfile } } +=head2 C<$kw->getScoresOutfile()> + +Getter for the output path for scores for the current C<$kw->{'file'}>. + +=cut + +sub getScoresOutfile +{ + my $self = shift; + my $file = shift || $self->{'file'}; + + $self->setScoresOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); + return $self->{'filemap'}->{$file."_SCORES"}; +} + +=head2 C<$kw->setScoresOutfile()> + +Setter for the output path for scores for the current C<$kw->{'outfile'}>. Can +be defined manually with a scalar argument, otherwise the path is constructed +from the existing attributes. + +=cut + +sub setScoresOutfile +{ + my $self = shift; + my $path = shift; + + if (defined($path)) { + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $path || return "Failed to set $path"; + return undef; + } + if ($self->{'joinScores'}) { + if ($self->{'singleOutfile'}) { + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL'}; + } else { + $self->setOutfile() unless (defined($self->{'filemap'}->{$self->{'file'}})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{$self->{'file'}}; + } + return undef; + } else { + if ($self->{'singleOutfile'}) { + $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL_SCORES'}; + return undef; + } + } + my $file = $self->{'file'}; + $file =~ s/\//_/g; # Change dir slashes to _ + $file =~ s/(\.[^\.]*)$//g; # Remove extensions + $file = uc($file); # Convert to uppercase for rule names + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'dir'}."/".$self->{'priority'} . + '_' . uc($self->{'id'}) . + '_' . $file . + '_SCORES' . + '.cf'; +} + =head2 C<$kw->getGlobalOutfile();> Return the full path of the output file used for GLOBAL rules. If it is not yet @@ -468,7 +540,7 @@ sub setGlobalOutfile $last = $file if (!defined($last) || $file gt $last); } my $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId().".cf"; - if ($file gt $last) { + if (defined($last) && $file gt $last) { $self->{'filename'}->{'GLOBAL'} = $file; return undef; } @@ -484,32 +556,155 @@ sub setGlobalOutfile return ("Cannot determine a valid GLOBAL output file\n"); } -=head2 C<$kw->scorefile($file);> +=head2 C<$kw->getGlobalScoresOutfile();> -Return the file to output scores for the current C<$file> or the one provided. -A provided C<$file> will be checked against both the keys and values of the -filemap. +Return the full path of the output file used for GLOBAL rules. If it is not yet +defined, then C will be run first to try to set it. If this +fails, then nothing will be returned. -This will simply insert '_SCORES' just before the '.cf' extension. +=cut +sub getGlobalScoresOutfile +{ + my $self = shift; + return $self->{'filemap'}->{'GLOBAL_SCORES'} if (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + my $ret = $self->setGlobalScoresOutfile(); + return $self->{'filemap'}->{'GLOBAL_SCORES'} unless ($ret); +} + +=head2 C<$kw->setGlobalScoresOutfile($file);> + +Set the output file for the scores associated with global rules. This file must +be either the same or alphabetically after the global rules file. C<$file> can +be used to bypass that check, but it might lead to rules that do not work. + +Will try to select a filename as close to the existing output files as possible. +First it will simply copy the global rules file name if it is in C +mode. Then, it will try to simply append '_SCORES' to the base file name prior +to the extension. Next, if the priority is less than 99, it will simply +increment that. Finally, if will try to double the first '_'. If none of these +techniques work, it will return an error. =cut -sub scorefile +sub setGlobalScoresOutfile { my $self = shift; - my $file = shift || $self->getFile(); + my $path = shift; - my $scorefile = $self->getOutfile($file); - unless (defined($scorefile)) { - foreach (keys(%{$self->{'filemap'}})) { - if ($self->{'filemap'}->{$_} eq $file) { - $scorefile = $file; - last; - } + if (defined($path)) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $path; + return undef; + } + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + if ($self->{'joinScores'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $self->{'filemap'}->{'GLOBAL'}; + return undef; + } + if (defined($self->{'filemap'}->{'GLOBAL_SCORES'}) && $self->{'filemap'}->{'GLOBAL_SCORES'} gt $self->{'filemap'}->{'GLOBAL'}) { + return undef; + } + my $file = $self->{'filemap'}->{'GLOBAL'}; + $file =~ s/\.cf$/_SCORES.cf/; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId()."_SCORES.cf"; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filename'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + my $gpriority = $self->{'filename'}->{'GLOBAL'}; + $gpriority =~ s/^(\d\d).*/$1/; + $file = $self->getDir().'/'.($gpriority+1)."_".$self->getId()."_SCORES.cf" if ($gpriority < 99); + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filename'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + $file = $self->{'filename'}->{'GLOBAL'}; + $file =~ s/_/__/; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filename'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + return ("Cannot determine a valid GLOBAL output file\n"); +} + +=head2 C<$kw->getOutput()> + +Returns a reference to the output array buffer for the current C<$file>. + +=cut + +sub getOutput +{ + my $self = shift; + my $file = shift || $self->{'file'}; + + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; + } +} + +=head2 C<$kw->getScoresOutput()> + +Returns a reference to the score output array buffer for the current C<$file>. + +=cut + +sub getScoresOutput +{ + my $self = shift; + my $file = shift || $self->{'file'}; + + $self->setScoreOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); + if ($self->{'joinScores'}) { + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; + } + } else { + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file."_SCORES"}}}; } } - $scorefile =~ s/\.cf$/_SCORES.cf/; - return $scorefile if ($scorefile =~ m/_SCORES\.cf$/); +} + +=head2 C<$kw->getGlobalOutput()> + +Returns a reference to the output array buffer for the GLOBAL file. + +=cut + +sub getGlobalOutput +{ + my $self = shift; + + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; +} + +=head2 C<$kw->getGlobalScoresOutput()> + +Returns a reference to the output array buffer for the GLOBAL scores file. + +=cut + +sub getGlobalScoresOutput +{ + my $self = shift; + + $self->setGlobalScoresOutfile unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + if ($self->{'joinScores'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } else { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; + } } =head2 C<$kw->getFiles($regex);> @@ -571,16 +766,17 @@ sub nextFile return 0; } -=head2 C<$kw->clearFiles();> +=head2 C<$kw->clearFile();> Clear the current file reference queue. =cut -sub clearFiles +sub clearFile { my $self = shift; - delete($self->{'rules'}) || return "Failed to delete rules hash\n"; + my $file = shift || $self->{'file'}; + delete($self->{'rules'}->{$file}) || return "Failed to delete rules hash\n"; return 0; } @@ -757,48 +953,6 @@ sub readLine return (); } -=head2 C<$kw->processMetas($outfile, $file);> - -Create all of the component meta rules for the declared C<$file>. Those that -will be used for the standalone and count rules. Output to C<$outfile>. This -must be run before the other process methods and must be run for 'GLOBAL' first, -otherwise output will be invalid. Meta rules for file-specific words will not be -generated if they are also in the 'GLOBAL' group, instead the meta rules from -the 'GLOBAL' file will be used for the count rules in all other files. This -will prevent duplicates, but also requires that you not rename output files such -that the they appear before the 'GLOBAL' file (without '_C<$file>' at the end). - -=cut - -sub processMetas -{ - my $self = shift; - my $file = shift; - my $rules = shift; - - my $prefix = $self->{'id'}; - my @words; - if ($file eq 'GLOBAL') { - @words = @{$rules->{'GLOBAL'}}; - } else { - $prefix .= "_".$file; - foreach (keys(%{$rules->{$file}->{'groups'}})) { - next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}); - } - foreach (keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}})) { - next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}); - next if (grep {/^$_$/} @words); - push (@words, $_); - } - } - foreach my $word (@words) { - $self->{'output'}->{$file} .= - "body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" . - "header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" . - "meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n"; - } - -} =head2 C<$kw->getPrefix($file);> @@ -809,13 +963,13 @@ Return a standardized rule prefix using C and C. sub getPrefix { my $self = shift; + my $file = shift || $self->getFile(); my $id = $self->getId(); - my $file = $self->getFile(); - $file =~ s/(?:.*\/)*(.*)\.cf/$1/; - return uc($id."_".$file); + $file =~ s/(?:.*\/)*(.*)\.cf/$1/ if (defined($file)); + return uc($id.(defined($file) ? "_$file" : '')); } -=head2 C<$kw->writeMetas($file);> +=head2 C<$kw->generateMetas($file);> Write component rules for file in the current file, or one set by C<$file> to make them available to all other rule types. @@ -829,38 +983,41 @@ subject header, then a 'meta' rule to connect them. =cut -sub writeMetas +sub generateMetas { my $self = shift; - my $file = shift || $self->getFile(); + my $words = shift; + my $file = $self->getFile(); + my $prefix = $self->getPrefix(); + my $output = $self->getOutput(); + my $scoresoutput = $self->getScoresOutput(); + + if ($self->{'debug'}) { + print STDERR "Writing Metas for $file\n"; + } if ($self->{'singleOutfile'}) { - print( -"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n". -"# Metas for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n". -"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n" + push(@{$output}, + "############".('#'*length($prefix))."#\n". + "# Metas for $prefix\n". + "############".('#'*length($prefix))."#\n\n" ); } - print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n"); - my %uniq = %{$self->{'rules'}->{$file}} || (); - foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) { - next if ($group eq 'COMMENT' || $group eq 'SCORED'); - foreach ( @{$self->{'rules'}->{$file}->{'SCORED'}} ) { - $uniq{$_} = 0 unless (defined($uniq{$_})); - } - } - my $output; - foreach ( sort(keys(%uniq)) ) { - if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) { - print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n"); - } - print("body __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_BODY /\b'.$_.'\b/'."\n"); - print("header __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_SUBJ Subject =~ /\b'.$_.'\b/'."\n"); - print("meta __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_BODY || __".$self->{'rules'}->{$file}->{'PREFIX'}."_SUBJ )\n\n"); + foreach my $word (keys(%{$words})) { + push(@{$output}, "# ".$words->{$word}."\n") if ($words->{$word}); + push(@{$output}, + "body __${prefix}_".uc($word). + "_BODY /\\b${word}\\b/\n", + "header __${prefix}_".uc($word). + "_SUBJ Subject =~ /\\b${word}\\b/\n", + "meta __${prefix}_".uc($word). + " ( __${prefix}_".uc($word)."_BODY || __${prefix}_". + uc($word)."_SUBJ )\n\n" + ); } } -=head2 C<$kw->writeWords($file);> +=head2 C<$kw->generateScored($file);> Write 'SCORED' word rules for file in the current file, or one set by C<$file>. @@ -872,88 +1029,96 @@ rule with a '__' prefix). =cut -sub writeWords +sub generateScored { my $self = shift; my $file = shift || $self->getFile(); + my $prefix = $self->getPrefix(); + my $output = $self->getOutput(); + my $scoreoutput = $self->getScoresOutput(); if ($self->{'singleOutfile'}) { - print( -"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n". -"# Words for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n". -"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n" + push(@{$output}, + "############".('#'*length($prefix))."#\n". + "# Scored words for $prefix\n". + "############".('#'*length($prefix))."#\n\n" ); } - foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}->{$_}}) ) { - print("meta ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_). - " ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." )\n" + foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}}) ) { + push(@{$output}, "meta ${prefix}_".uc($_). + " ( __${prefix}_".uc($_)." )\n" ); if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) { - print("describe ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ". - $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n" + push(@{$scoreoutput}, "describe ${prefix}_".uc($_)." ". + $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}. + "\n" ); } - print("score ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ". + push(@{$scoreoutput}, + "score ${prefix}_".uc($_)." ". $self->{'rules'}->{$file}->{'SCORED'}->{$_}."\n\n" ); } } -=head2 C<$kw->writeGroups($file);> +=head2 C<$kw->generateGroups($file);> Write group rules for file in the current file, or one set by C<$file>. These are a list of 'meta' rules where the component rules are all of the 'meta's in the group for each of the available match counts. - meta KW_FILE_GROUP_1 ( ( __KW_WORD1 + __KW_WORD2 ) >= 1 ) + meta KW_FILE_GROUP_1 ( __KW_WORD1 + __KW_WORD2 ) >= 1 describe KW_FILE_GROUP_1 Found 1 word(s) from GROUP score KW_FILE_GROUP_1 0.01 - meta KW_FILE_GROUP_2 ( ( __KW_WORD1 + __KW_WORD2 ) >= 2 ) + meta KW_FILE_GROUP_2 ( __KW_WORD1 + __KW_WORD2 ) >= 2 describe KW_FILE_GROUP_2 Found 2 word(s) from GROUP score KW_FILE_GROUP_2 0.01 =cut -sub writeGroups +sub generateGroups { my $self = shift; my $file = shift || $self->getFile(); + my $prefix = $self->getPrefix(); - my $prefix = $self->{'rules'}->{$file}->{'PREFIX'}; + my $output = $self->getOutput(); + my $scoreoutput = $self->getScoresOutput(); if ($self->{'singleOutfile'}) { - print( -"#############".('#'*length($prefix))."#\n". -"# Groups for ".$prefix."\n". -"#############".('#'*length($prefix))."#\n\n" + push(@{$output}, + "#############".('#'*length($prefix))."#\n". + "# Groups for $prefix\n". + "#############".('#'*length($prefix))."#\n\n" ); } foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) { - next if ( $group eq 'COMMENT' || $group eq 'SCORED' ); - print("# $group\n"); + next if ( $group eq 'COMMENTS' || $group eq 'SCORED' ); + push(@{$output}, "# $group\n"); my $gprefix = $prefix; unless ($group eq 'LOCAL') { - $gprefix .= $group; + $gprefix .= '_'.$group; } - my $start = "meta ".$gprefix."_"; - my $words = " ( ( "; - foreach my $word ( keys(%{$self->{'rules'}->{$file}->{$group}}) ) { - $words .= "__".$prefix."_".$word." + "; + my $start = "meta ${gprefix}_"; + my $words = " ( "; + foreach my $word ( @{$self->{'rules'}->{$file}->{$group}} ) { + $words .= "__${prefix}_".uc($word)." + "; } $words =~ s/\+ $/\) >= /; for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{$file}->{$group}}); $i++) { - my $line = $start.$i.$words.$i." )\n"; - print($line."\n". - "describe ".$gprefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n" + push(@{$output}, $start.$i.$words.$i."\n\n"); + push(@{$scoreoutput}, + "describe ${gprefix}_$i Found $i $group word" + . ($i>1 ? 's' : '')." from $prefix\n", + "score ${gprefix}_$i 0.01\n\n" ); - print("score ".$gprefix."_".$i. " 0.01\n\n"); } } } -=head2 C<$kw->writeGlobals();> +=head2 C<$kw->generateGlobals();> -Write 'GLOBAL' group rules. Similar to C except that component +Write 'GLOBAL' group rules. Similar to C except that component rules must be included from external files. meta KW_1 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 1 ) @@ -966,38 +1131,41 @@ rules must be included from external files. =cut -sub writeGlobals +sub generateGlobals { my $self = shift; - my $outfile = $self->getGlobalOutfile(); - print( -"##########\n". -"# Globals\n". -"##########\n\n" - ); - my $prefix = $self->getId(); - my $start = "meta ".$prefix."_"; - my $words = " ( ( "; + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + my $output = $self->getGlobalOutput(); + my $scoreoutput = $self->getGlobalScoresOutput(); + if ($self->{'singleOutfile'}) { + push(@{$output}, + "##########\n". + "# Globals\n". + "##########\n\n" + ); + } + $self->setFile(undef); + my $gprefix = $self->getPrefix(); + my $start = "meta ${gprefix}_"; + my $words = " ( "; + my $prefix; foreach my $word ( keys(%{$self->{'rules'}->{'GLOBAL'}}) ) { - $words .= "__".$self->{'rules'}->{'GLOBAL'}->{$word}."_".$word." + "; + $prefix = $self->getPrefix($self->{'rules'}->{'GLOBAL'}->{$word}); + $words .= "__${prefix}_" . uc($word) . " + "; } $words =~ s/\+ $/\) >= /; - for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{'GLOBAL'}}); $i++) { - my $line = $start.$i.$words.$i." )\n"; - print($line."\n". - "describe ".$prefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n" + for (my $i = 1; $i <= scalar(keys(%{$self->{'rules'}->{'GLOBAL'}})); $i++) { + my $line = $start.$i.$words.$i."\n"; + push(@{$output}, $line.($self->{'joinScores'} ? '' : "\n")); + push(@{$scoreoutput}, "describe ${gprefix}_${i} Found $i ". + "GLOBAL word(s) from ${gprefix}\n", + "score ${gprefix}_${i} 0.01\n\n" ); - print("score ".$prefix."_".$i. " 0.01\n\n"); } } -sub writeScores -{ - my $self = shift; - my $path = shift; -} - sub getDir { my $self = shift; @@ -1009,7 +1177,9 @@ sub setDir { my $self = shift; my $dir = shift || ("$ENV{'PWD'}/$self->{'id'}"); - $self->{'dir'} = $dir; + $dir =~ m/([^\0]+)/; + $self->{'dir'} = $1; + return undef; } sub createDir @@ -1025,19 +1195,25 @@ sub createDir sub cleanDir { my $self = shift; - my @files = @_ || (keys(%{$self->{'filemap'}})); + my @files = @_; + unless (scalar(@files)) { + my %uniq; + foreach (keys(%{$self->{'filemap'}})) { + $uniq{$_} = 1; + } + @files = keys(%uniq); + } foreach my $file (@files) { if (-e $self->{'filemap'}->{$file}) { if ($self->{'debug'}) { print STDERR "Removing old file ".$self->{'filemap'}->{$file}."\n"; - print "Removing old file ".$self->{'filemap'}->{$file}."\n"; - #unlink($self->{'dir'}.'/'.$self->{'filemap'}->{'file'}) || die "Output file '".$self->{'filemap'}->{$file}."' already exists and could not be deleted\n"; } + unlink($self->{'filemap'}->{$file}) || die "Output file '".$self->{'filemap'}->{$file}."' exists and could not be deleted\n"; } } } -=head2 C<$kw->writeAll($dir)> +=head2 C<$kw->generateAll($dir)> Output files will use the name of each input file, stripping any extension, and forcing the name to uppercase. Rules in each file will be called: @@ -1060,7 +1236,7 @@ body for that word. =cut -sub writeAll +sub generateAll { my $self = shift; my @written; @@ -1070,19 +1246,62 @@ sub writeAll foreach my $file (keys(%{$self->{'rules'}})) { # Reserve GLOBAL for last - if ($file eq 'GLOBAL') { - next; - } + next if ($file eq 'GLOBAL'); $self->setFile($file); - $self->writeMetas(); - $self->writeWords(); - $self->writeGroups(); - $self->writeScores(); + my %all; + foreach my $group (keys(%{$self->{'rules'}->{$file}})) { + if ($group eq 'SCORED' || $group eq 'COMMENTS') { + next; + } else { + foreach my $word (@{$self->{'rules'}->{$file}->{$group}}) { + $all{$word} = $self->{'rules'}->{$file}->{'COMMENTS'}->{$word} || ''; + } + } + } + $self->generateMetas(\%all); + $self->generateGroups(); + $self->generateScored() unless ($self->{joinScores}); } - $self->writeGlobals(); + $self->generateGlobals(); return 0; } +=head2 C<$kw->writeFile($file)> + +=cut + +sub writeFile +{ + my $self = shift; + my $file = shift || return "Requires filename"; + return "Nothing generated for $file" unless (defined($self->{'output'}->{$file})); + + if (open(my $fh, ">", $file)) { + foreach my $line (@{$self->{'output'}->{$file}}) { + print $fh $line; + } + close($fh); + } else { + return "Could not open $file for writing"; + } +} + +=head2 C<$kw->writeAll()> + +=cut + +sub writeAll +{ + my $self = shift; + $self->generateAll() unless (defined($self->{'output'})); + my @errors; + foreach my $file (keys(%{$self->{'output'}})) { + my $ret = $self->writeFile($file); + push(@errors, $ret) if (defined($ret)); + } + return ("Errors:\n ".join("\n ", @errors)) if (scalar(@errors)); +} + =head1 MORE For discussion of the module and examples, see: @@ -1120,15 +1339,3 @@ Ehttps://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGeneratorE =cut 1; - -=pod -die "Please provide rules file(s) as an argument\n" unless (defined($ARGV[0])); - -my @files; -my $files_ref = \@files; -getFiles($files_ref, @ARGV); - -my %keywords; -my $key_ref = \%keywords; -readFiles($key_ref, @files); -=cut diff --git a/t/04_rules.t b/t/04_rules.t index 2680872..8035ff2 100644 --- a/t/04_rules.t +++ b/t/04_rules.t @@ -20,26 +20,18 @@ my ( $missing, $extra, $incorrect ) = 0; use Data::Dump; foreach my $file (keys(%{$expected})) { if ($file eq 'GLOBAL') { - my ($m, $e) = compareGroups( + my ($m, $e) = compareValues( $expected->{$file}, $kw->{'rules'}->{$file} ); - print("Missing $m words\n") if ($m); - print("$e extra words\n") if ($e); + print("Missing $m words in GLOBAL\n") if ($m); + print("$e extra words in GLOBAL\n") if ($e); $missing += $m; $extra += $e; } else { foreach my $group (keys(%{$expected->{$file}})) { - if ($group eq 'SCORED') { - my ($m, $e, $i) = compareScores( - $expected->{$file}->{$group}, - $kw->{'rules'}->{$file}->{$group} - ); - $missing += $m; - $extra += $e; - $incorrect += $i; - } elsif ($group eq 'COMMENTS') { - my ($m, $e, $i) = compareScores( + if ($group eq 'SCORED' || $group eq 'COMMENTS') { + my ($m, $e, $i) = compareValues( $expected->{$file}->{$group}, $kw->{'rules'}->{$file}->{$group} ); @@ -47,7 +39,7 @@ foreach my $file (keys(%{$expected})) { $extra += $e; $incorrect += $i; } else { - my ($m, $e) = compareGroups( + my ($m, $e) = compareLists( $expected->{$file}->{$group}, $kw->{'rules'}->{$file}->{$group} ); @@ -64,7 +56,7 @@ ok ($incorrect == 0, "No incorrect scores are found"); done_testing(); -sub compareGroups +sub compareLists { my $expect = shift; my $loaded = shift; @@ -75,7 +67,7 @@ sub compareGroups while (scalar(@e)) { unless (scalar(@l)) { - print("extra words @l\n"); + print("extra words @e\n"); $extra += scalar(@e); last; } @@ -106,7 +98,7 @@ sub compareGroups return ($missing, $extra); } -sub compareScores +sub compareValues { my $expect = shift; my $loaded = shift; @@ -114,11 +106,11 @@ sub compareScores my %remaining = %{$loaded}; my ($missing, $extra, $incorrect) = (0, 0, 0); foreach my $word (keys(%$expect)) { - if (!defined($loaded->{$word})) { - print("Missing score assignment for $word\n"); + if (!defined($loaded->{$word}) && defined($expect->{$word}) && $expect->{$word} != 0) { + print("Missing value assignment for $word\n"); $missing++; } elsif ($expect->{$word} != $loaded->{$word}) { - print("Incorrect score assignment for $word\n"); + print("Incorrect value assignment for $word\n"); $incorrect++; } else { delete($remaining{$word}); @@ -133,39 +125,39 @@ sub compareScores sub getExpected { my %expected = ( - 'GLOBAL' => [ - 'lorem', - 'ipsum', - 'dolor', - 'sit', - 'amet', - 'consectetur', - 'adipiscing', - 'elit', - 'sed', - 'do', - 'eiusmod', - 'tempor', - 'minim', - 'veniam', - 'quis', - 'nostrud', - 'exercitation', - 'ullamco', - 'laboris', - 'nisi', - 'ut', - 'aliquip', - 'ex', - 'ea', - 'commodo', - 'consequat', - 'duis', - 'dolore', - 'eu', - 'fugiat', - ], - '50_04_T_04_RULES0.cf' => { + 'GLOBAL' => { + 'lorem' => 't/04_rules0.cf', + 'ipsum' => 't/04_rules0.cf', + 'dolor' => 't/04_rules0.cf', + 'sit' => 't/04_rules0.cf', + 'amet' => 't/04_rules0.cf', + 'consectetur' => 't/04_rules0.cf', + 'adipiscing' => 't/04_rules0.cf', + 'elit' => 't/04_rules0.cf', + 'sed' => 't/04_rules0.cf', + 'do' => 't/04_rules0.cf', + 'eiusmod' => 't/04_rules0.cf', + 'tempor' => 't/04_rules0.cf', + 'minim' => 't/04_rules0.cf', + 'veniam' => 't/04_rules0.cf', + 'quis' => 't/04_rules0.cf', + 'nostrud' => 't/04_rules1.cf', + 'exercitation' => 't/04_rules1.cf', + 'ullamco' => 't/04_rules1.cf', + 'laboris' => 't/04_rules1.cf', + 'nisi' => 't/04_rules1.cf', + 'ut' => 't/04_rules1.cf', + 'aliquip' => 't/04_rules1.cf', + 'ex' => 't/04_rules1.cf', + 'ea' => 't/04_rules1.cf', + 'commodo' => 't/04_rules1.cf', + 'consequat' => 't/04_rules1.cf', + 'duis' => 't/04_rules1.cf', + 'dolore' => 't/04_rules1.cf', + 'eu' => 't/04_rules1.cf', + 'fugiat' => 't/04_rules1.cf', + }, + 't/04_rules0.cf' => { 'SCORED' => { 'lorem' => 1, 'sit' => 1, @@ -219,7 +211,7 @@ sub getExpected 'veniam', 'quis', ], - 'group' => [ + 'GROUP' => [ 'lorem', 'ipsum', 'dolor', @@ -234,7 +226,7 @@ sub getExpected 'ad', ] }, - '50_04_T_04_RULES1.cf' => { + 't/04_rules1.cf' => { 'SCORED' => { 'nostrud' => 1, 'laboris' => 1, @@ -262,7 +254,7 @@ sub getExpected 'eu', 'fugiat', ], - 'group' => [ + 'GROUP' => [ 'nostrud', 'exercitation', 'ullamco', diff --git a/t/05_write.t b/t/05_write.t new file mode 100644 index 0000000..094e463 --- /dev/null +++ b/t/05_write.t @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +# Basic test to ensure files can be found and read. Verification will come later + +use Test::More; + +use lib 'lib/'; +use Mail::SpamAssassin::KeywordRuleGenerator; + +my $id = '05'; + +my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => T.$id, 'debug' => 0, 'joinScores' => 0 } ); +my $testdir = $ENV{'PWD'}.'/t/'.T.$id; +ok (!$kw->createDir($testdir), "Created output directory: '$ENV{'PWD'}/t/$id'"); +$kw->setDir($testdir); +ok ($kw->getDir() eq $testdir, "Set output directory: '$ENV{'PWD'}/t/$id'"); + +my @files = ( 't/04_rules0.cf', 't/04_rules1.cf' ); +my @failed = @{$kw->readAll( @files )}; +ok(!scalar(@failed), "Load 'rules' hash with readAll"); + +$kw->writeAll(); + +my @files = glob($testdir.'/*.cf'); +ok (scalar(@files) == 6, "Correct number of files generated"); +my %expected = ( + '50_T05_T_04_RULES0.cf' => 169, + '50_T05_T_04_RULES0_SCORES.cf' => 105, + '50_T05_T_04_RULES1.cf' => 148, + '50_T05_T_04_RULES1_SCORES.cf' => 97, + '51_T05.cf' => 60, + '51_T05_SCORES.cf' => 90 +); +my %remaining = %expected; +foreach (@files) { + $e = $_; + $e =~ s/^(.*\/)?t\/T05\///; + if (open(my $fh, "<", $_)) { + while (<$fh>) { + $expected{$e}--; + } + close($fh); + } + ok ($expected{$e} == 0, "Correct number of lines found in $e"); + delete($remaining{$e}); +} +ok (!scalar(keys(%remaining)), "All expected output files found"); + +use Mail::SpamAssassin; +my $sa = Mail::SpamAssassin->new( { 'site_rules_filename' => $testdir } ); +$failed = $sa->lint_rules(); +ok (!$failed, "Verified by spamassassin".($res ? "\n$failed" : "")); + +$kw->cleanDir(); + +done_testing(); + diff --git a/t/T05/sa_plugins.pre b/t/T05/sa_plugins.pre new file mode 100644 index 0000000..61172f7 --- /dev/null +++ b/t/T05/sa_plugins.pre @@ -0,0 +1 @@ +loadplugin Mail::SpamAssassin::Plugin::Check