Significant progress to successful writes

This commit is contained in:
John Mertz 2022-11-29 22:22:33 -05:00
parent c0b635c15c
commit acc4250cd8
Signed by: jpm
GPG Key ID: E9C5EA2D867501AB
1 changed files with 362 additions and 122 deletions

View File

@ -31,7 +31,7 @@ Implemented as a module largely for testing purposes.
use Mail::SpamAssassin::KeywordRuleGenerator; use Mail::SpamAssassin::KeywordRuleGenerator;
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new($id); my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new($id);
$kw->readFile('keywords.cf'); $kw->readFile('keywords.cf');
$kw->writeFiles(); $kw->writeAll();
=head1 DESCRIPTION =head1 DESCRIPTION
@ -62,7 +62,7 @@ method. By default, the output file name and the rules therein will use a
stripped and capitalized version of those filenames. stripped and capitalized version of those filenames.
$kw->readFiles( 'example.txt' ); $kw->readFiles( 'example.txt' );
$kw->writeFiles(); $kw->writeAll();
This will creates rules formatted like: This will creates rules formatted like:
@ -72,7 +72,7 @@ and will output to the file:
70_id_example.cf 70_id_example.cf
See the C<writeFile> method for more information on this formatting. Also see See the C<write*> methods for more information on this formatting. Also see
the C<new> method for discussion of the 'id'. the C<new> method for discussion of the 'id'.
Finally, a like file: Finally, a like file:
@ -134,20 +134,21 @@ our %EXPORT_TAGS = (
'setPriority', 'setPriority',
'getOutfile', 'getOutfile',
'setOutfile', 'setOutfile',
'getScorefile', 'getGlobalOutfile',
'setScorefile', 'setGlobalOutfile',
'scorefile',
'clearFiles', 'clearFiles',
'getFiles', 'getFiles',
'nextFile', 'nextFile',
'readFile', 'readFile',
'getFile', 'getFile',
'setFile', 'setFile',
'joinRules', 'createDir',
'processMetas', 'cleanDir',
'processWords', 'writeMetas',
'processGroups', 'writeWords',
'processAll', 'writeGroups',
'writeFile', 'writeGlobals',
'writeAll' 'writeAll'
] ]
); );
@ -358,16 +359,19 @@ sub setFile
=head2 C<$kw->getOutfile()> =head2 C<$kw->getOutfile()>
Getter for C<$kw->{'outfile'}>. 'outfile' represents the real filepath of the Getter for C<$kw->{'file'}>. 'file' represents the real filepath of the
output file which is currently being processed. output file. With a single argument, the output file for that input file will be
returned. Without an argument, the current working output file will be returned,
if available).
=cut =cut
sub getOutfile sub getOutfile
{ {
my $self = shift; my $self = shift;
my $file = shift || $self->{'file'};
if (defined($self->{'outfile'})) { if (defined($self->{'filemap'}->{$file})) {
return $self->{'outfile'}; return $self->{'outfile'};
} }
} }
@ -385,10 +389,10 @@ sub setOutfile
my $path = shift; my $path = shift;
if (defined($path)) { if (defined($path)) {
$self->{'outfile'} = $path || return "Failed to set $path"; $self->{'filemap'}->{$self->{'file'}} = $path || return "Failed to set $path";
} else { } else {
if ($self->{'singleOutfile'}) { if ($self->{'singleOutfile'}) {
$self->{'outfile'} = $self->{'priority'} . $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} .
'_' . uc($self->{'id'}) . '_' . uc($self->{'id'}) .
'.cf'; '.cf';
} else { } else {
@ -397,7 +401,7 @@ sub setOutfile
$file =~ s/(\.[^\.]*)$//g; # Remove extensions $file =~ s/(\.[^\.]*)$//g; # Remove extensions
$file = uc($file); # Convert to uppercase for rule names $file = uc($file); # Convert to uppercase for rule names
$self->{'outfile'} = $self->{'priority'} . $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} .
'_' . uc($self->{'id'}) . '_' . uc($self->{'id'}) .
'_' . $file . '_' . $file .
'.cf'; '.cf';
@ -405,6 +409,109 @@ sub setOutfile
} }
} }
=head2 C<$kw->getGlobalOutfile();>
Return the full path of the output file used for GLOBAL rules. If it is not yet
defined, then C<setGlobalOutfile()> will be run first to try to set it. If this
fails, then nothing will be returned.
=cut
sub getGlobalOutfile
{
my $self = shift;
return $self->{'filemap'}->{'GLOBAL'} if (defined($self->{'filemap'}->{'GLOBAL'}));
my $ret = $self->setGlobalOutfile();
return $self->{'filemap'}->{'GLOBAL'} unless ($ret);
}
=head2 C<$kw->setGlobalOutfile($file);>
Set the output file for global rules. This file must be either the same or
alphabetically after the last file with 'meta' rules. C<$file> can be used to
bypass that check, but it might lead to rules that do not work.
Will try to select a filename as close to the existing output files as possible.
First it will simply duplicate a name if it is in C<singleOutput> mode. Then,
it will try to use the base name without the C<$file> portion. This will
generally not work because the '_' before the file will come after the dot
without:
99_KW.cf
99_KW_FILE.cf
99_KW_FILE_SCORES.cf
Next, if the priority is less than 99, it will simply increment that. Finally,
if will try to double the first '_':
99__KW.cf
If none of these techniques work, it will return an error.
=cut
sub setGlobalOutfile
{
my $self = shift;
my $path = shift;
if (defined($path)) {
$self->{'filename'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]};
return undef;
}
if ($self->{'singleOutfile'}) {
$self->{'filename'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]};
return undef;
}
my $last;
foreach my $file (keys(%{$self->{'filemap'}})) {
$last = $file if (!defined($last) || $file gt $last);
}
my $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId().".cf";
if ($file gt $last) {
$self->{'filename'}->{'GLOBAL'} = $file;
return undef;
}
if ($self->getPriority < 99) {
$self->{'filemap'}->{'GLOBAL'} = $self->getDir().'/'.($self->getPriority()+1)."_".$self->getId().".cf";
return undef;
}
$file =~ s/_/__/ unless ($file gt $last);
if ($file gt $last) {
$self->{'filename'}->{'GLOBAL'} = $file;
return undef;
}
return ("Cannot determine a valid GLOBAL output file\n");
}
=head2 C<$kw->scorefile($file);>
Return the file to output scores for the current C<$file> or the one provided.
A provided C<$file> will be checked against both the keys and values of the
filemap.
This will simply insert '_SCORES' just before the '.cf' extension.
=cut
sub scorefile
{
my $self = shift;
my $file = shift || $self->getFile();
my $scorefile = $self->getOutfile($file);
unless (defined($scorefile)) {
foreach (keys(%{$self->{'filemap'}})) {
if ($self->{'filemap'}->{$_} eq $file) {
$scorefile = $file;
last;
}
}
}
$scorefile =~ s/\.cf$/_SCORES.cf/;
return $scorefile if ($scorefile =~ m/_SCORES\.cf$/);
}
=head2 C<$kw->getFiles($regex);> =head2 C<$kw->getFiles($regex);>
Simple recursive search for files within a directory. Will validate that each Simple recursive search for files within a directory. Will validate that each
@ -513,10 +620,11 @@ sub readFile
my $self = shift; my $self = shift;
my $file = shift || return 'No file provided'; my $file = shift || return 'No file provided';
my %args = @_; my %args = @_;
$self->getDir();
$self->{'file'} = $file; $self->{'file'} = $file;
$self->setOutfile(); $self->setOutfile();
my $n = 0;
my $n = 0;
if (open(my $fh, '<', $file)) { if (open(my $fh, '<', $file)) {
my $rules = 0; my $rules = 0;
while (<$fh>) { while (<$fh>) {
@ -530,20 +638,22 @@ sub readFile
print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n"; print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n";
} }
foreach my $group (@groups) { foreach my $group (@groups) {
# Global rules must references the file where the component rule is located
if ($group eq 'GLOBAL') { if ($group eq 'GLOBAL') {
push(@{$self->{'rules'}->{'GLOBAL'}}, $word); $self->{'rules'}->{'GLOBAL'}->{$word} = $self->{'file'};
# Local rules have enough information from context to determine the component rule
} else { } else {
push(@{$self->{'rules'}->{$self->{'outfile'}}->{$group}}, $word); push(@{$self->{'rules'}->{$self->{'file'}}->{$group}}, $word);
} }
} }
$self->{'rules'}->{$self->{'outfile'}}->{'SCORED'}->{$word} = $score if ($score); $self->{'rules'}->{$self->{'file'}}->{'SCORED'}->{$word} = $score if ($score);
$self->{'rules'}->{$self->{'outfile'}}->{'COMMENTS'}->{$word} = $comment if ($comment); $self->{'rules'}->{$self->{'file'}}->{'COMMENTS'}->{$word} = $comment if ($comment);
} elsif ($self->{debug}) { } elsif ($self->{debug}) {
print STDERR "ERROR: Invalid input in $file, line $n: $_\n"; print STDERR "ERROR: Invalid input in $file, line $n: $_\n";
} }
} }
print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}} || !$self->{'debug'}); print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}} || !$self->{'debug'});
return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}}); return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}});
} else { } else {
delete($self->{'file'}); delete($self->{'file'});
return "Failed to read $file"; return "Failed to read $file";
@ -623,7 +733,7 @@ sub readLine
} elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) { } elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) {
$score = $section; $score = $section;
} elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) { } elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) {
push(@groups, $section); push(@groups, uc($section));
} elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) { } elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) {
$comment = $section; $comment = $section;
$comment =~ s/^#\s*//; $comment =~ s/^#\s*//;
@ -647,62 +757,6 @@ sub readLine
return (); return ();
} }
=head2 C<$kw->joinRules(%rules)>
Merge rules hash into working output hash. Generally called from C<readFile>,
but if you want to add rules manually, it will require the following format:
{
'word' => {
'score' => 0,
'groups' => ( 'LOCAL' )
},
'other' => {
'score' => 1,
'groups' => ( 'GLOBAL', 'group' )
}
}
The main context object will provide the working 'ID', 'FILE' and other
attributes necessary to nest these rules in the existing hash.
=cut
sub joinRules
{
my $self = shift;
my %rules = @_;
if ($self->{'unified'}) {
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'.cf';
} else {
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'_'.$self->{'file'}.'.cf';
}
foreach my $word (keys(%rules)) {
my $score = $rules{$word}{'score'} || 0;
my @groups = $rules{$word}{'groups'} || ( 'GLOBAL' );
if ($score) {
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}->{$word} = $score;
}
foreach my $group (@groups) {
if ($group eq 'GLOBAL') {
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}))) {
push(@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}, $word);
} else {
$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'} = ( $word );
}
next;
}
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}))) {
push(@{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}, $word);
} else {
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group} = ( $word );
}
}
}
return 0;
}
=head2 C<$kw->processMetas($outfile, $file);> =head2 C<$kw->processMetas($outfile, $file);>
Create all of the component meta rules for the declared C<$file>. Those that Create all of the component meta rules for the declared C<$file>. Those that
@ -719,7 +773,6 @@ that the they appear before the 'GLOBAL' file (without '_C<$file>' at the end).
sub processMetas sub processMetas
{ {
my $self = shift; my $self = shift;
my $outfile = shift;
my $file = shift; my $file = shift;
my $rules = shift; my $rules = shift;
@ -739,7 +792,7 @@ sub processMetas
} }
} }
foreach my $word (@words) { foreach my $word (@words) {
$self->{'output'}->{$outfile} .= $self->{'output'}->{$file} .=
"body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" . "body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" .
"header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" . "header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" .
"meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n"; "meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n";
@ -747,61 +800,244 @@ sub processMetas
} }
=head2 C<$kw->processWords($outfile,%args);> =head2 C<$kw->getPrefix($file);>
Take a list of words with scores and add them to C<$outfile>. Return a standardized rule prefix using C<id> and C<file>.
=cut =cut
sub processWords sub getPrefix
{ {
my $self = shift; my $self = shift;
my $out = shift; my $id = $self->getId();
my $file = $self->getFile();
$file =~ s/(?:.*\/)*(.*)\.cf/$1/;
return uc($id."_".$file);
} }
=head2 C<$kw->processGroup(%args);> =head2 C<$kw->writeMetas($file);>
Take a single group, including 'GLOBAL' and add it to C<$outfile>. Write component rules for file in the current file, or one set by C<$file> to
make them available to all other rule types.
These are a match for each word across all groups and 'SCORED' in the body and
subject header, then a 'meta' rule to connect them.
body __KW_FILE_WORD_BODY /\bword\b/
header __KW_FILE_WORD_SUBJ Subject =~ /\bword\b/
meta __KW_FILE_WORD ( __KW_FILE_WORD_BODY || __KW_FILE_WORD_SUBJ )
=cut =cut
sub processGroup sub writeMetas
{ {
my $self = shift; my $self = shift;
my $outfile = shift; my $file = shift || $self->getFile();
for (my $i = 0; $i < scalar(@all); $i++) { if ($self->{'singleOutfile'}) {
$files->{$self->{'priority'}.'_'.$self->{'id'}.'cf'} .= print(
"meta\t".$self->{'id'}."_".$i."\t( ".join(' + ',@all)." ) > $i\n" . "############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n".
"describe\tMatched ".($i+1)."of keywords: ".join(', ',@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}})."\n\n"; "# Metas for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n".
"############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n"
);
}
print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n");
my %uniq = %{$self->{'rules'}->{$file}} || ();
foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) {
next if ($group eq 'COMMENT' || $group eq 'SCORED');
foreach ( @{$self->{'rules'}->{$file}->{'SCORED'}} ) {
$uniq{$_} = 0 unless (defined($uniq{$_}));
}
}
my $output;
foreach ( sort(keys(%uniq)) ) {
if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) {
print("# ".$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n");
}
print("body __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_BODY /\b'.$_.'\b/'."\n");
print("header __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).'_SUBJ Subject =~ /\b'.$_.'\b/'."\n");
print("meta __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_BODY || __".$self->{'rules'}->{$file}->{'PREFIX'}."_SUBJ )\n\n");
} }
} }
=head2 C<$kw->processAll(%args);> =head2 C<$kw->writeWords($file);>
Process all groups. Write 'SCORED' word rules for file in the current file, or one set by C<$file>.
These are simply a 'meta' rule for only the existing component rule (the same
rule with a '__' prefix).
meta KW_FILE_WORD ( __KW_FILE_WORD )
score KW_FILE_WORD 1.0
=cut =cut
sub processAll sub writeWords
{ {
my $self = shift; my $self = shift;
my %args = @_; my $file = shift || $self->getFile();
$self->{'output'} = {}; if ($self->{'singleOutfile'}) {
foreach my $id (keys(%{$self->{'rules'}})) { print(
$self->processMetas($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id}); "############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n".
$self->processGroups($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id}); "# Words for ".$self->{'rules'}->{$file}->{'PREFIX'}."\n".
$self->processWords($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id}); "############".('#'*length($self->{'rules'}->{$file}->{'PREFIX'}))."#\n\n"
foreach my $file (keys(%{$self->{'rules'}->{$id}})) { );
$self->processMetas($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id}); }
$self->processGroups($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id}); foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}->{$_}}) ) {
$self->processWords($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id}); print("meta ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_).
" ( __".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." )\n"
);
if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) {
print("describe ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ".
$self->{'rules'}->{$file}->{'COMMENTS'}->{$_}."\n"
);
}
print("score ".$self->{'rules'}->{$file}->{'PREFIX'}."_".uc($_)." ".
$self->{'rules'}->{$file}->{'SCORED'}->{$_}."\n\n"
);
}
}
=head2 C<$kw->writeGroups($file);>
Write group rules for file in the current file, or one set by C<$file>. These
are a list of 'meta' rules where the component rules are all of the 'meta's in
the group for each of the available match counts.
meta KW_FILE_GROUP_1 ( ( __KW_WORD1 + __KW_WORD2 ) >= 1 )
describe KW_FILE_GROUP_1 Found 1 word(s) from GROUP
score KW_FILE_GROUP_1 0.01
meta KW_FILE_GROUP_2 ( ( __KW_WORD1 + __KW_WORD2 ) >= 2 )
describe KW_FILE_GROUP_2 Found 2 word(s) from GROUP
score KW_FILE_GROUP_2 0.01
=cut
sub writeGroups
{
my $self = shift;
my $file = shift || $self->getFile();
my $prefix = $self->{'rules'}->{$file}->{'PREFIX'};
if ($self->{'singleOutfile'}) {
print(
"#############".('#'*length($prefix))."#\n".
"# Groups for ".$prefix."\n".
"#############".('#'*length($prefix))."#\n\n"
);
}
foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) {
next if ( $group eq 'COMMENT' || $group eq 'SCORED' );
print("# $group\n");
my $gprefix = $prefix;
unless ($group eq 'LOCAL') {
$gprefix .= $group;
}
my $start = "meta ".$gprefix."_";
my $words = " ( ( ";
foreach my $word ( keys(%{$self->{'rules'}->{$file}->{$group}}) ) {
$words .= "__".$prefix."_".$word." + ";
}
$words =~ s/\+ $/\) >= /;
for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{$file}->{$group}}); $i++) {
my $line = $start.$i.$words.$i." )\n";
print($line."\n".
"describe ".$gprefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n"
);
print("score ".$gprefix."_".$i. " 0.01\n\n");
} }
} }
} }
=head2 C<$kw->writeFiles($out_dir)> =head2 C<$kw->writeGlobals();>
Write 'GLOBAL' group rules. Similar to C<writeGroups> except that component
rules must be included from external files.
meta KW_1 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 1 )
describe KW_1 Found 1 GLOBAL word(s) from KW
score KW_1 0.01
meta KW_2 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 2 )
describe KW_2 Found 2 GLOBAL word(s) from KW
score KW_2 0.01
=cut
sub writeGlobals
{
my $self = shift;
my $outfile = $self->getGlobalOutfile();
print(
"##########\n".
"# Globals\n".
"##########\n\n"
);
my $prefix = $self->getId();
my $start = "meta ".$prefix."_";
my $words = " ( ( ";
foreach my $word ( keys(%{$self->{'rules'}->{'GLOBAL'}}) ) {
$words .= "__".$self->{'rules'}->{'GLOBAL'}->{$word}."_".$word." + ";
}
$words =~ s/\+ $/\) >= /;
for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{'GLOBAL'}}); $i++) {
my $line = $start.$i.$words.$i." )\n";
print($line."\n".
"describe ".$prefix."_".$i." Found $i GLOBAL word(s) from ".$prefix."\n"
);
print("score ".$prefix."_".$i. " 0.01\n\n");
}
}
sub writeScores
{
my $self = shift;
my $path = shift;
}
sub getDir
{
my $self = shift;
my $dir = $self->{'dir'} || $self->setDir();
return $dir;
}
sub setDir
{
my $self = shift;
my $dir = shift || ("$ENV{'PWD'}/$self->{'id'}");
$self->{'dir'} = $dir;
}
sub createDir
{
my $self = shift;
my $dir = shift || $self->getDir();
unless (-d $dir) {
mkdir($dir) || return "Failed to mkdir '$dir'";
}
return undef;
}
sub cleanDir
{
my $self = shift;
my @files = @_ || (keys(%{$self->{'filemap'}}));
foreach my $file (@files) {
if (-e $self->{'filemap'}->{$file}) {
if ($self->{'debug'}) {
print STDERR "Removing old file ".$self->{'filemap'}->{$file}."\n";
print "Removing old file ".$self->{'filemap'}->{$file}."\n";
#unlink($self->{'dir'}.'/'.$self->{'filemap'}->{'file'}) || die "Output file '".$self->{'filemap'}->{$file}."' already exists and could not be deleted\n";
}
}
}
}
=head2 C<$kw->writeAll($dir)>
Output files will use the name of each input file, stripping any extension, and Output files will use the name of each input file, stripping any extension, and
forcing the name to uppercase. Rules in each file will be called: forcing the name to uppercase. Rules in each file will be called:
@ -824,22 +1060,26 @@ body for that word.
=cut =cut
sub writeFiles sub writeAll
{ {
my $self = shift; my $self = shift;
foreach my $out (keys(%{$self->{'rules'}})) { my @written;
unless ($self->{'joinScores'}) {
$file =~ s/(.*)\.cf/$1_SCORES.cf/;
print STDERR $self->{'scores'};
}
if (open(my $fh, '>', $out)) {
print $fh $self->{'output'}->{$out};
close($fh);
} else {
print STDERR "Failed to write $out\n";
}
}
$self->cleanDir() if (-d $self->getDir());
$self->createDir() unless (-d $self->getDir());
foreach my $file (keys(%{$self->{'rules'}})) {
# Reserve GLOBAL for last
if ($file eq 'GLOBAL') {
next;
}
$self->setFile($file);
$self->writeMetas();
$self->writeWords();
$self->writeGroups();
$self->writeScores();
}
$self->writeGlobals();
return 0; return 0;
} }