#!/usr/bin/perl # ---------------- CRM114 email feedback program -------------------- # # The mailfilter.crm program requires feedback when it # incorrectly classifies an email. The "normal" way is to forward # the email to yourself with embedded re-classification commands, but # this isn't very easy to use for non-technophiles. # # This program makes the process easier, by scanning a Maildir for # messages to re-classify. The user drags mis-classified email into this # folder, and this program is run by a cron entry periodically. # # This program reads the header of the mis-classified email to determine # how it was originally classified, and then issues commands to force # the filter to re-learn the text. # # Revision: 1.05 # Author: Michael J. Chudobiak (mjc@avtechpulse.com) # # Feel free to use/modify this program any way you like. # # Revisions # ------------------------------------------------------------------- # 1.0: Initial version. # 1.01: Remove all X-CRM114 headers, not just X-CRM114-Status. # 1.02: Just scan headers for X-CRM114 messages, not the body. # Also, throw an error if the number of X-CRM114-Status headers # is not exactly one. # 1.03: Added option to delete messages if the number of # X-CRM114-Status headers is not exactly one. # 1.04: missing period typo: s/^X-CRM114-*// corrected to s/^X-CRM114-.*// # 1.05: add option to mark messages with improper CRM114 headers as spam # ------------------------------------------------------------------- use Tie::File; # to edit a file in-place. use strict; # to catch stupid errors # ---------- Configuration ------------------------------------------ # set verbosity my $debug_messages=1; # The base directory is normally "/home" my $basedir = "/home"; # This is the sub-directory of /home/user that contains the mailfilter.cf, # spam.css and nonspam.css files my $crmdir = "crm114_workingdir"; # Location of procmail my $procmail = "/usr/bin/procmail"; # This is the sub-directory of /home/user where mail is delivered my $Maildir = "Maildir"; # This is the filesystem location of the folder to scan. # The default value is "INBOX.checkspam.re-classify/cur". # Personally, I have procmail drop spam into INBOX.checkspam, # and I set up a "re-classify" sub-folder. my $IMAPfolderdir = "INBOX.checkspam.re-classify/cur"; # What should we do with email that is not marked clearly as spam # or non-spam? (This shouldn't happen, but it can if several users # share a common inbox, and one or more users move a message to the # re-classification folder before all of the email clients # are updated. The first message-move will be successful, # but later ones might just create a blank file.) # It can also happen (it seems) if an incoming spam # message is really strangely formatted - then some versions of # CRM114 seem to have trouble adding X-CRM headers. # The safe thing to do is to ignore it, and return an error. # The practical thing to do is to treat it as spam, because # the strangely formatted messages almost certainly are spam, # and blank messages won't affect learning. my $mark_unclassified_message_as_spam = 1; # If you are worried about incorrect learning that might be # introduced by marking unclassified messages as spam, then set the # above variable is set to 0. To simply delete unclassified messages, # set the next variable to 1. (If you don't delete the message, # you will get an error warning every time this script runs, until # you deal with the message file by removing it manually.) my $delete_unclassified_messages = 1; # The above two variables are ugly hacks, really, that wouldn't # be necessary in a perfect world. # ---------- End of Configuration ----------------------------------- # Read in all the user directories in the base directory, # e.g., /home/alice, /home/bob, /home/charles opendir(DIR,$basedir); my @entries = readdir(DIR); closedir(DIR); foreach my $user (@entries) { # ignore directories starting with a dot next if ($user =~ /^\./); my $userhome = "$basedir/$user"; my $full = "$userhome/$Maildir/$IMAPfolderdir"; # See if the re-classify folder exists. If so, proceed. if (-e $full) { # get a list of message files in the re-classify folder opendir(SPAMDIR,$full); my @filelist = readdir(SPAMDIR); closedir(SPAMDIR); # process each file foreach my $filename (@filelist) { # ignore directories starting with a dot next if ($filename =~ /^\./); my $full_filename="$full/$filename"; # Read in the file, by tieing it to an array. # Changes to the array cause changes in the file. tie my @lines_in_file, 'Tie::File', $full_filename; # Determine the original spam/nonspam classification my $currently_classified_as_spam=0; my $currently_classified_as_good=0; if ($debug_messages) {print "Reading headers ";} for (@lines_in_file) { # Terminate the loop if a blank line (indicating the end of the headers) # is found. if (/^$/) { last; } if ($debug_messages) {print ".";} # Is there a header declaring the message to be spam? $currently_classified_as_spam += (s/^X-CRM114-Status: SPAM.*//); # Is there a header declaring the message to be non-spam? $currently_classified_as_good += (s/^X-CRM114-Status: Good.*//); # Erase all CRM114 status headers, to avoiding confusing the # re-learning software. s/^X-CRM114-.*//; } if ($debug_messages) {print "\n";} # Close the message file. untie @lines_in_file; # change to CRM114 working directory chdir("$userhome/$crmdir"); if (($currently_classified_as_spam == 1) && ($currently_classified_as_good == 0)) { # incorrectly marked as spam if ($debug_messages) {print "Change classification of $full_filename to non-spam.\n";} # learn as non-spam my $command = "$userhome/$crmdir/mailfilter.crm --learnnonspam < $full_filename"; `$command`; # re-deliver message using procmail $command = "sudo -u $user $procmail $userhome/.procmailrc < $full_filename"; `$command`; # delete the file in the re-classify directory unlink $full_filename; } elsif ( (($currently_classified_as_good == 1) && ($currently_classified_as_spam == 0)) || ($mark_unclassified_message_as_spam == 1)) { # incorrectly marked as non-spam, or not marked at all if ($debug_messages) {print "Change classification of $full_filename to SPAM.\n";} #learn as spam my $command = "$userhome/$crmdir/mailfilter.crm --learnspam < $full_filename"; `$command`; # delete the file in the re-classify directory unlink $full_filename; } else { # unexpected condition! if ($debug_messages) { print "Not sure how to handle $full_filename. "; print "Too few or too many X-CRM114-Status headers found.\n"; if ($delete_unclassified_messages) { print "Deleting file.\n"; } else { print "Skipping file.\n"; } } if ($delete_unclassified_messages) { # delete the file in the re-classify directory unlink $full_filename; } } } } }