Comprehensive working version

Code could be cleaned, but all functions seem to work
2022-12-20 02:30:34 -05:00 · 2022-12-20 02:30:34 -05:00 · 53fb986994
parent acc4250cd8
commit 53fb986994
4 changed files with 491 additions and 234 deletions
--- a/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm
+++ b/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm
@ -72,7 +72,7 @@ and will output to the file:
    70_id_example.cf
-See the C<write*> methods for more information on this formatting. Also see
+See the C<generate*> methods for more information on this formatting. Also see
 the C<new> method for discussion of the 'id'.
 Finally, a like file:
@ -134,10 +134,17 @@ our %EXPORT_TAGS = (
                'setPriority',
                'getOutfile',
                'setOutfile',
                'getScoresOutfile',
                'setScoresOutfile',
                'getGlobalOutfile',
                'setGlobalOutfile',
-                'scorefile',
+                'getGlobalScoresOutfile',
-                'clearFiles',
+                'setGlobalScoresOutfile',
                'getOutput',
                'getScoresOutput',
                'getGlobalOutput',
                'getGlobalScoresOutput',
                'clearFile',
                'getFiles',
                'nextFile',
                'readFile',
@ -145,10 +152,12 @@ our %EXPORT_TAGS = (
                'setFile',
                'createDir',
                'cleanDir',
-                'writeMetas',
+                'generateMetas',
-                'writeWords',
+                'generateScored',
-                'writeGroups',
+                'generateGroups',
-                'writeGlobals',
+                'generateGlobals',
                'generateAll',
                'writeFile',
                'writeAll'
        ]
 );
@ -280,6 +289,7 @@ sub setId
        if (defined($id)) {
                $id = uc($id);
                return "Must start with a letter" unless ($id =~ m/^[A-Z]/);
                $self->{'id'} = $id || return "Failed to set $id";
        } else {
                return "No ID provided\n";
@ -337,6 +347,7 @@ sub getFile
        if (defined($self->{'file'})) {
                return $self->{'file'};
        }
        return undef;
 }
 =head2 C<$kw->setFile()>
@ -352,8 +363,10 @@ sub setFile
        if (defined($file)) {
                $self->{'file'} = $file;
                $self->setOutfile();
                $self->setScoresOutfile();
        } else {
-                return "No File provided\n";
+                delete($self->{'file'});
        }
 }
@ -409,6 +422,65 @@ sub setOutfile
        }
 }
 =head2 C<$kw->getScoresOutfile()>
 Getter for the output path for scores for the current C<$kw->{'file'}>.
 =cut
 sub getScoresOutfile
 {
        my $self = shift;
        my $file = shift || $self->{'file'};
        $self->setScoresOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"}));
        return $self->{'filemap'}->{$file."_SCORES"};
 }
 =head2 C<$kw->setScoresOutfile()>
 Setter for the output path for scores for the current C<$kw->{'outfile'}>. Can
 be defined manually with a scalar argument, otherwise the path is constructed
 from the existing attributes.
 =cut
 sub setScoresOutfile
 {
        my $self = shift;
        my $path = shift;
        if (defined($path)) {
                $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $path || return "Failed to set $path";
                return undef;
        }
        if ($self->{'joinScores'}) {
                if ($self->{'singleOutfile'}) {
                        $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'}));
                        $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL'};
                } else {
                        $self->setOutfile() unless (defined($self->{'filemap'}->{$self->{'file'}}));
                        $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{$self->{'file'}};
                }
                return undef;
        } else {
                if ($self->{'singleOutfile'}) {
                        $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'}));
                        $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL_SCORES'};
                        return undef;
                }
        }
        my $file = $self->{'file'};
        $file =~ s/\//_/g; # Change dir slashes to _
        $file =~ s/(\.[^\.]*)$//g; # Remove extensions
        $file = uc($file); # Convert to uppercase for rule names
        $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'dir'}."/".$self->{'priority'} .
                '_' . uc($self->{'id'}) .
                '_' . $file . 
                '_SCORES' .
                '.cf';
 }
 =head2 C<$kw->getGlobalOutfile();>
 Return the full path of the output file used for GLOBAL rules. If it is not yet
@ -468,7 +540,7 @@ sub setGlobalOutfile
                $last = $file if (!defined($last) || $file gt $last);
        }
        my $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId().".cf";
-        if ($file gt $last) {
+        if (defined($last) && $file gt $last) {
                $self->{'filename'}->{'GLOBAL'} = $file;
                return undef;
        }
@ -484,32 +556,155 @@ sub setGlobalOutfile
        return ("Cannot determine a valid GLOBAL output file\n");
 }
-=head2 C<$kw->scorefile($file);>
+=head2 C<$kw->getGlobalScoresOutfile();>
-Return the file to output scores for the current C<$file> or the one provided.
+Return the full path of the output file used for GLOBAL rules. If it is not yet
-A provided C<$file> will be checked against both the keys and values of the
+defined, then C<setGlobalOutfile()> will be run first to try to set it. If this
-filemap.
+fails, then nothing will be returned.
-This will simply insert '_SCORES' just before the '.cf' extension.
+=cut
 sub getGlobalScoresOutfile
 {
        my $self = shift;
        return $self->{'filemap'}->{'GLOBAL_SCORES'} if (defined($self->{'filemap'}->{'GLOBAL_SCORES'}));
        my $ret = $self->setGlobalScoresOutfile();
        return $self->{'filemap'}->{'GLOBAL_SCORES'} unless ($ret);
 }
 =head2 C<$kw->setGlobalScoresOutfile($file);>
 Set the output file for the scores associated with global rules. This file must
 be either the same or alphabetically after the global rules file. C<$file> can
 be used to bypass that check, but it might lead to rules that do not work.
 Will try to select a filename as close to the existing output files as possible.
 First it will simply copy the global rules file name if it is in C<joinScores>
 mode. Then, it will try to simply append '_SCORES' to the base file name prior
 to the extension. Next, if the priority is less than 99, it will simply
 increment that. Finally, if will try to double the first '_'. If none of these
 techniques work, it will return an error.
 =cut
-sub scorefile
+sub setGlobalScoresOutfile
 {
        my $self = shift;
-        my $file = shift || $self->getFile();
+        my $path = shift;
-        my $scorefile = $self->getOutfile($file);
+        if (defined($path)) {
-        unless (defined($scorefile)) {
+                $self->{'filemap'}->{'GLOBAL_SCORES'} = $path;
-                foreach (keys(%{$self->{'filemap'}})) {
+                return undef;
-                        if ($self->{'filemap'}->{$_} eq $file) {
+        }
-                                $scorefile = $file;
+        $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'}));
-                                last;
+        if ($self->{'joinScores'}) {
-                        }
+                $self->{'filemap'}->{'GLOBAL_SCORES'} = $self->{'filemap'}->{'GLOBAL'};
                return undef;
        }
        if (defined($self->{'filemap'}->{'GLOBAL_SCORES'}) && $self->{'filemap'}->{'GLOBAL_SCORES'} gt $self->{'filemap'}->{'GLOBAL'}) {
                return undef;
        }
        my $file = $self->{'filemap'}->{'GLOBAL'};
        $file =~ s/\.cf$/_SCORES.cf/;
        if ($file gt $self->{'filemap'}->{'GLOBAL'}) {
                $self->{'filemap'}->{'GLOBAL_SCORES'} = $file;
                return undef;
        }
        $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId()."_SCORES.cf";
        if ($file gt $self->{'filemap'}->{'GLOBAL'}) {
                $self->{'filename'}->{'GLOBAL_SCORES'} = $file;
                return undef;
        }
        my $gpriority = $self->{'filename'}->{'GLOBAL'};
        $gpriority =~ s/^(\d\d).*/$1/;
        $file = $self->getDir().'/'.($gpriority+1)."_".$self->getId()."_SCORES.cf" if ($gpriority < 99);
        if ($file gt $self->{'filemap'}->{'GLOBAL'}) {
                $self->{'filename'}->{'GLOBAL_SCORES'} = $file;
                return undef;
        }
        $file = $self->{'filename'}->{'GLOBAL'};
        $file =~ s/_/__/;
        if ($file gt $self->{'filemap'}->{'GLOBAL'}) {
                $self->{'filename'}->{'GLOBAL_SCORES'} = $file;
                return undef;
        }
        return ("Cannot determine a valid GLOBAL output file\n");
 }
 =head2 C<$kw->getOutput()>
 Returns a reference to the output array buffer for the current C<$file>.
 =cut
 sub getOutput
 {
        my $self = shift;
        my $file = shift || $self->{'file'};
        if ($self->{'singleOutfile'}) {
                return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}};
        } elsif (defined($self->{'filemap'}->{$file})) {
                return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}};
        }
 }
 =head2 C<$kw->getScoresOutput()>
 Returns a reference to the score output array buffer for the current C<$file>.
 =cut
 sub getScoresOutput
 {
        my $self = shift;
        my $file = shift || $self->{'file'};
        $self->setScoreOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"}));
        if ($self->{'joinScores'}) {
                if ($self->{'singleOutfile'}) {
                        return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}};
                } elsif (defined($self->{'filemap'}->{$file})) {
                        return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}};
                }
        } else {
                if ($self->{'singleOutfile'}) {
                        return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}};
                } elsif (defined($self->{'filemap'}->{$file})) {
                        return \@{$self->{'output'}->{$self->{'filemap'}->{$file."_SCORES"}}};
                }
        }
-        $scorefile =~ s/\.cf$/_SCORES.cf/;
+}
-        return $scorefile if ($scorefile =~ m/_SCORES\.cf$/);
+
 =head2 C<$kw->getGlobalOutput()>
 Returns a reference to the output array buffer for the GLOBAL file.
 =cut
 sub getGlobalOutput
 {
        my $self = shift;
        $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'}));
        return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}};
 }
 =head2 C<$kw->getGlobalScoresOutput()>
 Returns a reference to the output array buffer for the GLOBAL scores file.
 =cut
 sub getGlobalScoresOutput
 {
        my $self = shift;
        $self->setGlobalScoresOutfile unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'}));
        if ($self->{'joinScores'}) {
                return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}};
        } else {
                return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}};
        }
 }
 =head2 C<$kw->getFiles($regex);>
@ -571,16 +766,17 @@ sub nextFile
        return 0;
 }
-=head2 C<$kw->clearFiles();>
+=head2 C<$kw->clearFile();>
 Clear the current file reference queue.
 =cut
-sub clearFiles
+sub clearFile
 {
        my $self = shift;
-        delete($self->{'rules'}) || return "Failed to delete rules hash\n";
+        my $file = shift || $self->{'file'};
        delete($self->{'rules'}->{$file}) || return "Failed to delete rules hash\n";
        return 0;
 }
@ -757,48 +953,6 @@ sub readLine
        return ();
 }
 =head2 C<$kw->processMetas($outfile, $file);>
 Create all of the component meta rules for the declared C<$file>. Those that
 will be used for the standalone and count rules. Output to C<$outfile>. This
 must be run before the other process methods and must be run for 'GLOBAL' first,
 otherwise output will be invalid. Meta rules for file-specific words will not be
 generated if they are also in the 'GLOBAL' group, instead the meta rules from
 the 'GLOBAL' file will be used for the count rules in all other files. This
 will prevent duplicates, but also requires that you not rename output files such
 that the they appear before the 'GLOBAL' file (without '_C<$file>' at the end).
 =cut
 sub processMetas
 {
        my $self = shift;
        my $file = shift;
        my $rules = shift;
        my $prefix = $self->{'id'};
        my @words;
        if ($file eq 'GLOBAL') {
                @words = @{$rules->{'GLOBAL'}};
        } else {
                $prefix .= "_".$file;
                foreach (keys(%{$rules->{$file}->{'groups'}})) {
                        next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
                }
                foreach (keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}})) {
                        next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
                        next if (grep {/^$_$/} @words);
                        push (@words, $_);
                }
        }
        foreach my $word (@words) {
                $self->{'output'}->{$file} .=
                        "body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" .
                        "header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" .
                        "meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n";
        }
 }
 =head2 C<$kw->getPrefix($file);>
@ -809,13 +963,13 @@ Return a standardized rule prefix using C<id> and C<file>.
 sub getPrefix
 {
        my $self = shift;
        my $file = shift || $self->getFile();
        my $id = $self->getId();
-        my $file = $self->getFile();
+        $file =~ s/(?:.*\/)*(.*)\.cf/$1/ if (defined($file));
-        $file =~ s/(?:.*\/)*(.*)\.cf/$1/;
+        return uc($id.(defined($file) ? "_$file" : ''));
        return uc($id."_".$file);
 }
-=head2 C<$kw->writeMetas($file);>
+=head2 C<$kw->generateMetas($file);>
 Write component rules for file in the current file, or one set by C<$file> to
 make them available to all other rule types.
@ -829,38 +983,41 @@ subject header, then a 'meta' rule to connect them.
 =cut
-sub writeMetas
+sub generateMetas
 {
        my $self = shift;
-        my $file = shift || $self->getFile();
+        my $words = shift;
        my $file = $self->getFile();
        my $prefix = $self->getPrefix();
        my $output = $self->getOutput();
        my $scoresoutput = $self->getScoresOutput();
        if ($self->{'debug'}) {
                print STDERR "Writing Metas for $file\n";
        }
        if ($self->{'singleOutfile'}) {
-                print(
+                push(@{$output}, 
-"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n".
+                        "############".('#'*length($prefix))."#\n".
-"# Metas for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n".
+                        "# Metas for $prefix\n".
-"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n"
+                        "############".('#'*length($prefix))."#\n\n"
                );
        }
-        print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n");
+        foreach my $word (keys(%{$words})) {
-        my %uniq = %{$self->{'rules'}->{$file}} || ();
+                push(@{$output}, "# ".$words->{$word}."\n") if ($words->{$word});
-        foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) {
+                push(@{$output},
-                next if ($group eq 'COMMENT' || $group eq 'SCORED');
+                        "body        __${prefix}_".uc($word).
-                foreach ( @{$self->{'rules'}->{$file}->{'SCORED'}} ) {
+                        "_BODY /\\b${word}\\b/\n",
-                        $uniq{$_} = 0 unless (defined($uniq{$_}));
+                        "header      __${prefix}_".uc($word).
-                }
+                        "_SUBJ Subject =~ /\\b${word}\\b/\n",
-        }
+                        "meta        __${prefix}_".uc($word).
-        my $output;
+                        " ( __${prefix}_".uc($word)."_BODY || __${prefix}_".
-        foreach ( sort(keys(%uniq)) ) {
+                        uc($word)."_SUBJ )\n\n"
-                if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) {
+                );
                        print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n");
                }
                print("body        __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_BODY /\b'.$_.'\b/'."\n");
                print("header      __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_SUBJ Subject =~ /\b'.$_.'\b/'."\n");
                print("meta        __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_BODY || __".$self->{'rules'}->{$file}->{'PREFIX'}."_SUBJ )\n\n");
        }
 }
-=head2 C<$kw->writeWords($file);>
+=head2 C<$kw->generateScored($file);>
 Write 'SCORED' word rules for file in the current file, or one set by C<$file>.
@ -872,88 +1029,96 @@ rule with a '__' prefix).
 =cut
-sub writeWords
+sub generateScored
 {
        my $self = shift;
        my $file = shift || $self->getFile();
        my $prefix = $self->getPrefix();
        my $output = $self->getOutput();
        my $scoreoutput = $self->getScoresOutput();
        if ($self->{'singleOutfile'}) {
-                print(
+                push(@{$output},
-"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n".
+                        "############".('#'*length($prefix))."#\n".
-"# Words for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n".
+                        "# Scored words for $prefix\n".
-"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n"
+                        "############".('#'*length($prefix))."#\n\n"
                );
        }
-        foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}->{$_}}) ) {
+        foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}}) ) {
-                print("meta        ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).
+                push(@{$output}, "meta        ${prefix}_".uc($_).
-                        " ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." )\n"
+                        " ( __${prefix}_".uc($_)." )\n"
                );
                if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) {
-                        print("describe    ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ".
+                        push(@{$scoreoutput}, "describe    ${prefix}_".uc($_)." ".
-                                $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n"
+                                $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}.
                                "\n"
                        );
                }
-                print("score       ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ".
+                push(@{$scoreoutput}, 
                        "score       ${prefix}_".uc($_)." ".
                        $self->{'rules'}->{$file}->{'SCORED'}->{$_}."\n\n"
                );
        }
 }
-=head2 C<$kw->writeGroups($file);>
+=head2 C<$kw->generateGroups($file);>
 Write group rules for file in the current file, or one set by C<$file>. These
 are a list of 'meta' rules where the component rules are all of the 'meta's in
 the group for each of the available match counts.
-    meta        KW_FILE_GROUP_1 ( ( __KW_WORD1 + __KW_WORD2 ) >= 1 )
+    meta        KW_FILE_GROUP_1 ( __KW_WORD1 + __KW_WORD2 ) >= 1
    describe    KW_FILE_GROUP_1 Found 1 word(s) from GROUP
    score       KW_FILE_GROUP_1 0.01
-    meta        KW_FILE_GROUP_2 ( ( __KW_WORD1 + __KW_WORD2 ) >= 2 )
+    meta        KW_FILE_GROUP_2 ( __KW_WORD1 + __KW_WORD2 ) >= 2
    describe    KW_FILE_GROUP_2 Found 2 word(s) from GROUP
    score       KW_FILE_GROUP_2 0.01
 =cut
-sub writeGroups
+sub generateGroups
 {
        my $self = shift;
        my $file = shift || $self->getFile();
        my $prefix = $self->getPrefix();
-        my $prefix = $self->{'rules'}->{$file}->{'PREFIX'};
+        my $output = $self->getOutput();
        my $scoreoutput = $self->getScoresOutput();
        if ($self->{'singleOutfile'}) {
-                print(
+                push(@{$output},
-"#############".('#'*length($prefix))."#\n".
+                        "#############".('#'*length($prefix))."#\n".
-"# Groups for ".$prefix."\n".
+                        "# Groups for $prefix\n".
-"#############".('#'*length($prefix))."#\n\n"
+                        "#############".('#'*length($prefix))."#\n\n"
                );
        }
        foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) {
-                next if ( $group eq 'COMMENT' || $group eq 'SCORED' );
+                next if ( $group eq 'COMMENTS' || $group eq 'SCORED' );
-                print("# $group\n");
+                push(@{$output}, "# $group\n");
                my $gprefix = $prefix;
                unless ($group eq 'LOCAL') {
-                        $gprefix .= $group;
+                        $gprefix .= '_'.$group;
                }
-                my $start = "meta        ".$gprefix."_";
+                my $start = "meta        ${gprefix}_";
-                my $words = " ( ( ";
+                my $words = " ( ";
-                foreach my $word ( keys(%{$self->{'rules'}->{$file}->{$group}}) ) {
+                foreach my $word ( @{$self->{'rules'}->{$file}->{$group}} ) {
-                        $words .= "__".$prefix."_".$word." + ";
+                        $words .= "__${prefix}_".uc($word)." + ";
                }
                $words =~ s/\+ $/\) >= /;
                for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{$file}->{$group}}); $i++) {
-                        my $line = $start.$i.$words.$i." )\n";
+                        push(@{$output}, $start.$i.$words.$i."\n\n");
-                        print($line."\n".
+                        push(@{$scoreoutput},
-                                "describe    ".$gprefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n"
+                                "describe    ${gprefix}_$i Found $i $group word"
                                . ($i>1 ? 's' : '')." from $prefix\n",
                                "score       ${gprefix}_$i 0.01\n\n"
                        );
                        print("score       ".$gprefix."_".$i. " 0.01\n\n");
                }
        }
 }
-=head2 C<$kw->writeGlobals();>
+=head2 C<$kw->generateGlobals();>
-Write 'GLOBAL' group rules. Similar to C<writeGroups> except that component
+Write 'GLOBAL' group rules. Similar to C<generateGroups> except that component
 rules must be included from external files.
    meta        KW_1 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 1 )
@ -966,38 +1131,41 @@ rules must be included from external files.
 =cut
-sub writeGlobals
+sub generateGlobals
 {
        my $self = shift;
-        my $outfile = $self->getGlobalOutfile();
+        $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'}));
-        print(
+        $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'}));
-"##########\n".
+        my $output = $self->getGlobalOutput();
-"# Globals\n".
+        my $scoreoutput = $self->getGlobalScoresOutput();
-"##########\n\n"
+        if ($self->{'singleOutfile'}) {
-        );
+                push(@{$output},
-        my $prefix = $self->getId();
+                        "##########\n".
-        my $start = "meta        ".$prefix."_";
+                        "# Globals\n".
-        my $words = " ( ( ";
+                        "##########\n\n"
                );
        }
        $self->setFile(undef);
        my $gprefix = $self->getPrefix();
        my $start = "meta        ${gprefix}_";
        my $words = " ( ";
        my $prefix;
        foreach my $word ( keys(%{$self->{'rules'}->{'GLOBAL'}}) ) {
-                $words .= "__".$self->{'rules'}->{'GLOBAL'}->{$word}."_".$word." + ";
+                $prefix = $self->getPrefix($self->{'rules'}->{'GLOBAL'}->{$word});
                $words .= "__${prefix}_" . uc($word) . " + ";
        }
        $words =~ s/\+ $/\) >= /;
-        for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{'GLOBAL'}}); $i++) {
+        for (my $i = 1; $i <= scalar(keys(%{$self->{'rules'}->{'GLOBAL'}})); $i++) {
-                my $line = $start.$i.$words.$i." )\n";
+                my $line = $start.$i.$words.$i."\n";
-                print($line."\n".
+                push(@{$output}, $line.($self->{'joinScores'} ? '' : "\n"));
-                        "describe    ".$prefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n"
+                push(@{$scoreoutput}, "describe    ${gprefix}_${i} Found $i ".
                        "GLOBAL word(s) from ${gprefix}\n",
                        "score       ${gprefix}_${i} 0.01\n\n"
                );
                print("score       ".$prefix."_".$i. " 0.01\n\n");
        }
 }
 sub writeScores
 {
        my $self = shift;
        my $path = shift;
 }
 sub getDir
 {
        my $self = shift;
@ -1009,7 +1177,9 @@ sub setDir
 {
        my $self = shift;
        my $dir = shift || ("$ENV{'PWD'}/$self->{'id'}");
-        $self->{'dir'} = $dir;
+        $dir =~ m/([^\0]+)/;
        $self->{'dir'} = $1;
        return undef;
 }
 sub createDir
@ -1025,19 +1195,25 @@ sub createDir
 sub cleanDir
 {
        my $self = shift;
-        my @files = @_ || (keys(%{$self->{'filemap'}}));
+        my @files = @_;
        unless (scalar(@files)) {
                my %uniq;
                foreach (keys(%{$self->{'filemap'}})) {
                        $uniq{$_} = 1;
                }
                @files = keys(%uniq);
        }
        foreach my $file (@files) {
                if (-e $self->{'filemap'}->{$file}) {
                        if ($self->{'debug'}) {
                                print STDERR "Removing old file ".$self->{'filemap'}->{$file}."\n";
                                print "Removing old file ".$self->{'filemap'}->{$file}."\n";
                                #unlink($self->{'dir'}.'/'.$self->{'filemap'}->{'file'}) || die "Output file '".$self->{'filemap'}->{$file}."' already exists and could not be deleted\n";
                        }
                        unlink($self->{'filemap'}->{$file}) || die "Output file '".$self->{'filemap'}->{$file}."' exists and could not be deleted\n";
                }
        }
 }
-=head2 C<$kw->writeAll($dir)>
+=head2 C<$kw->generateAll($dir)>
 Output files will use the name of each input file, stripping any extension, and
 forcing the name to uppercase. Rules in each file will be called:
@ -1060,7 +1236,7 @@ body for that word.
 =cut
-sub writeAll
+sub generateAll
 {
        my $self = shift;
        my @written;
@ -1070,19 +1246,62 @@ sub writeAll
        foreach my $file (keys(%{$self->{'rules'}})) {
                # Reserve GLOBAL for last
-                if ($file eq 'GLOBAL') {
+                next if ($file eq 'GLOBAL');
                        next;
                }
                $self->setFile($file);
-                $self->writeMetas();
+                my %all;
-                $self->writeWords();
+                foreach my $group (keys(%{$self->{'rules'}->{$file}})) {
-                $self->writeGroups();
+                        if ($group eq 'SCORED' || $group eq 'COMMENTS') {
-                $self->writeScores();
+                                next;
                        } else {
                                foreach my $word (@{$self->{'rules'}->{$file}->{$group}}) {
                                        $all{$word} = $self->{'rules'}->{$file}->{'COMMENTS'}->{$word} || '';
                                }
                        }
                }
                $self->generateMetas(\%all);
                $self->generateGroups();
                $self->generateScored() unless ($self->{joinScores});
        }
-        $self->writeGlobals();
+        $self->generateGlobals();
        return 0;
 }
 =head2 C<$kw->writeFile($file)>
 =cut
 sub writeFile
 {
        my $self = shift;
        my $file = shift || return "Requires filename";
        return "Nothing generated for $file" unless (defined($self->{'output'}->{$file}));
        if (open(my $fh, ">", $file)) {
                foreach my $line (@{$self->{'output'}->{$file}}) {
                        print $fh $line;
                }
                close($fh);
        } else {
                return "Could not open $file for writing";
        }
 }
 =head2 C<$kw->writeAll()>
 =cut
 sub writeAll
 {
        my $self = shift;
        $self->generateAll() unless (defined($self->{'output'}));
        my @errors;
        foreach my $file (keys(%{$self->{'output'}})) {
                my $ret = $self->writeFile($file);
                push(@errors, $ret) if (defined($ret));
        }
        return ("Errors:\n ".join("\n ", @errors)) if (scalar(@errors));
 }
 =head1 MORE
 For discussion of the module and examples, see:
@ -1120,15 +1339,3 @@ E<lt>https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGeneratorE<gt>
 =cut
 1;
 =pod
 die "Please provide rules file(s) as an argument\n" unless (defined($ARGV[0]));
 my @files;
 my $files_ref = \@files;
 getFiles($files_ref, @ARGV);
 my %keywords;
 my $key_ref = \%keywords;
 readFiles($key_ref, @files);
 =cut
--- a/t/04_rules.t
+++ b/t/04_rules.t
@ -20,26 +20,18 @@ my ( $missing, $extra, $incorrect ) = 0;
 use Data::Dump;
 foreach my $file (keys(%{$expected})) {
        if ($file eq 'GLOBAL') {
-                my ($m, $e) = compareGroups(
+                my ($m, $e) = compareValues(
                        $expected->{$file},
                        $kw->{'rules'}->{$file}
                );
-                print("Missing $m words\n") if ($m);
+                print("Missing $m words in GLOBAL\n") if ($m);
-                print("$e extra words\n") if ($e);
+                print("$e extra words in GLOBAL\n") if ($e);
                $missing += $m;
                $extra += $e;
        } else {
                foreach my $group (keys(%{$expected->{$file}})) {
-                        if ($group eq 'SCORED') {
+                        if ($group eq 'SCORED' || $group eq 'COMMENTS') {
-                                my ($m, $e, $i) = compareScores(
+                                my ($m, $e, $i) = compareValues(
                                        $expected->{$file}->{$group},
                                        $kw->{'rules'}->{$file}->{$group}
                                );
                                $missing += $m;
                                $extra += $e;
                                $incorrect += $i;
                        } elsif ($group eq 'COMMENTS') {
                                my ($m, $e, $i) = compareScores(
                                        $expected->{$file}->{$group},
                                        $kw->{'rules'}->{$file}->{$group}
                                );
@ -47,7 +39,7 @@ foreach my $file (keys(%{$expected})) {
                                $extra += $e;
                                $incorrect += $i;
                        } else {
-                                my ($m, $e) = compareGroups(
+                                my ($m, $e) = compareLists(
                                        $expected->{$file}->{$group},
                                        $kw->{'rules'}->{$file}->{$group}
                                );
@ -64,7 +56,7 @@ ok ($incorrect == 0, "No incorrect scores are found");
 done_testing();
-sub compareGroups
+sub compareLists
 {
        my $expect = shift;
        my $loaded = shift;
@ -75,7 +67,7 @@ sub compareGroups
        while (scalar(@e)) {
                unless (scalar(@l)) {
-                        print("extra words @l\n");
+                        print("extra words @e\n");
                        $extra += scalar(@e);
                        last;
                }
@ -106,7 +98,7 @@ sub compareGroups
        return ($missing, $extra);
 }
-sub compareScores
+sub compareValues
 {
        my $expect = shift;
        my $loaded = shift;
@ -114,11 +106,11 @@ sub compareScores
        my %remaining = %{$loaded};
        my ($missing, $extra, $incorrect) = (0, 0, 0);
        foreach my $word (keys(%$expect)) {
-                if (!defined($loaded->{$word})) {
+                if (!defined($loaded->{$word}) && defined($expect->{$word}) && $expect->{$word} != 0) {
-                        print("Missing score assignment for $word\n");
+                        print("Missing value assignment for $word\n");
                        $missing++;
                } elsif ($expect->{$word} != $loaded->{$word}) {
-                        print("Incorrect score assignment for $word\n");
+                        print("Incorrect value assignment for $word\n");
                        $incorrect++;
                } else {
                        delete($remaining{$word});
@ -133,39 +125,39 @@ sub compareScores
 sub getExpected
 {
        my %expected = (
-                'GLOBAL' => [
+                'GLOBAL' => {
-                        'lorem',
+                        'lorem' => 't/04_rules0.cf',
-                        'ipsum',
+                        'ipsum' => 't/04_rules0.cf',
-                        'dolor',
+                        'dolor' => 't/04_rules0.cf',
-                        'sit',
+                        'sit' => 't/04_rules0.cf',
-                        'amet',
+                        'amet' => 't/04_rules0.cf',
-                        'consectetur',
+                        'consectetur' => 't/04_rules0.cf',
-                        'adipiscing',
+                        'adipiscing' => 't/04_rules0.cf',
-                        'elit',
+                        'elit' => 't/04_rules0.cf',
-                        'sed',
+                        'sed' => 't/04_rules0.cf',
-                        'do',
+                        'do' => 't/04_rules0.cf',
-                        'eiusmod',
+                        'eiusmod' => 't/04_rules0.cf',
-                        'tempor',
+                        'tempor' => 't/04_rules0.cf',
-                        'minim',
+                        'minim' => 't/04_rules0.cf',
-                        'veniam',
+                        'veniam' => 't/04_rules0.cf',
-                        'quis',
+                        'quis' => 't/04_rules0.cf',
-                        'nostrud',
+                        'nostrud' => 't/04_rules1.cf',
-                        'exercitation',
+                        'exercitation' => 't/04_rules1.cf',
-                        'ullamco',
+                        'ullamco' => 't/04_rules1.cf',
-                        'laboris',
+                        'laboris' => 't/04_rules1.cf',
-                        'nisi',
+                        'nisi' => 't/04_rules1.cf',
-                        'ut',
+                        'ut' => 't/04_rules1.cf',
-                        'aliquip',
+                        'aliquip' => 't/04_rules1.cf',
-                        'ex',
+                        'ex' => 't/04_rules1.cf',
-                        'ea',
+                        'ea' => 't/04_rules1.cf',
-                        'commodo',
+                        'commodo' => 't/04_rules1.cf',
-                        'consequat',
+                        'consequat' => 't/04_rules1.cf',
-                        'duis',
+                        'duis' => 't/04_rules1.cf',
-                        'dolore',
+                        'dolore' => 't/04_rules1.cf',
-                        'eu',
+                        'eu' => 't/04_rules1.cf',
-                        'fugiat',
+                        'fugiat' => 't/04_rules1.cf',
-                ],
+                },
-                '50_04_T_04_RULES0.cf' => {
+                't/04_rules0.cf' => {
                        'SCORED' => {
                                'lorem' => 1,
                                'sit' => 1,
@ -219,7 +211,7 @@ sub getExpected
                                'veniam',
                                'quis',
                        ],
-                        'group' => [
+                        'GROUP' => [
                                'lorem',
                                'ipsum',
                                'dolor',
@ -234,7 +226,7 @@ sub getExpected
                                'ad',
                        ]
                },
-                '50_04_T_04_RULES1.cf' => {
+                't/04_rules1.cf' => {
                        'SCORED' => {
                                'nostrud' => 1,
                                'laboris' => 1,
@ -262,7 +254,7 @@ sub getExpected
                                'eu',
                                'fugiat',
                        ],
-                        'group' => [
+                        'GROUP' => [
                                'nostrud',
                                'exercitation',
                                'ullamco',
--- a/t/05_write.t
+++ b/t/05_write.t
@ -0,0 +1,57 @@
 #!/usr/bin/perl
 # Basic test to ensure files can be found and read. Verification will come later
 use Test::More;
 use lib 'lib/';
 use Mail::SpamAssassin::KeywordRuleGenerator;
 my $id = '05';
 my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => T.$id, 'debug' => 0, 'joinScores' => 0 } );
 my $testdir = $ENV{'PWD'}.'/t/'.T.$id;
 ok (!$kw->createDir($testdir), "Created output directory: '$ENV{'PWD'}/t/$id'");
 $kw->setDir($testdir);
 ok ($kw->getDir() eq $testdir, "Set output directory: '$ENV{'PWD'}/t/$id'");
 my @files = ( 't/04_rules0.cf', 't/04_rules1.cf' );
 my @failed = @{$kw->readAll( @files )};
 ok(!scalar(@failed), "Load 'rules' hash with readAll");
 $kw->writeAll();
 my @files = glob($testdir.'/*.cf');
 ok (scalar(@files) == 6, "Correct number of files generated");
 my %expected = (
        '50_T05_T_04_RULES0.cf' => 169,
        '50_T05_T_04_RULES0_SCORES.cf' => 105,
        '50_T05_T_04_RULES1.cf' => 148,
        '50_T05_T_04_RULES1_SCORES.cf' => 97,
        '51_T05.cf' => 60,
        '51_T05_SCORES.cf' => 90
 );
 my %remaining = %expected;
 foreach (@files) {
        $e = $_;
        $e =~ s/^(.*\/)?t\/T05\///;
        if (open(my $fh, "<", $_)) {
                while (<$fh>) {
                        $expected{$e}--;
                }
                close($fh);
        }
        ok ($expected{$e} == 0, "Correct number of lines found in $e");
        delete($remaining{$e});
 }
 ok (!scalar(keys(%remaining)), "All expected output files found");
 use Mail::SpamAssassin;
 my $sa = Mail::SpamAssassin->new( { 'site_rules_filename' => $testdir } );
 $failed = $sa->lint_rules();
 ok (!$failed, "Verified by spamassassin".($res ? "\n$failed" : ""));
 $kw->cleanDir();
 done_testing();
--- a/t/T05/sa_plugins.pre
+++ b/t/T05/sa_plugins.pre
@ -0,0 +1 @@
 loadplugin Mail::SpamAssassin::Plugin::Check
		`@ -0,0 +1 @@`
							`loadplugin Mail::SpamAssassin::Plugin::Check`