#!/usr/bin/perl # # mb2md-2-01.pl Converts Mbox mailboxes to Maildir format. # # Public domain. # # Robin Whittle 12 March 2002 # # This is version 2.01 of the original mb2mb script of 15 July 2001. # # Reads a directory full of Mbox format mailboxes and creates a set of # Maildir format mailboxes. Some details of this are to suit Courier # IMAP's naming conventions for Maildir mailboxes. # # http://www.inter7.com/courierimap/ # # This is intended to automate the conversion of the old # /var/spool/mail/blah file - with one call of this script - and to # convert one or more mailboxes in a specifed directory with separate # calls with other command line arguments. # # Run this as the user - in these examples "blah". # # This script's web abode is http://www.firstpr.com.au/web-mail/ . # # I knew nothing of Perl before I wrote this. I used the man and FAQ pages # at http://www.perldoc.com and a chapter preview from http://www.cgi-perl.com. # # The Mbox -> Maildir inner loop is based on qmail's script mbox2maildir, which # was kludged by Ivan Kohler in 1997 from convertandcreate (public domain) # by Russel Nelson. Both these convert a single mailspool file. # # The qmail distribution has a maildir2mbox.c program. # # Original version tested on RedHat 7.1 Perl 5.6.0 # Version 2.01 tested on RedHat 7.2 Perl 5.6.0 # # # # -------------------------------------------------------------------- # # Version 2.01 requires there be the GNU touch program at /bin/touch # # If it is somewhere else, then alter this: $touchcmd = "/bin/touch"; # Version 2.01 supports conversion of: # # Date The date-time in the "From " line of the message in the # Mbox format is the date when the message was *received*. # This is transformed into the date-time of the file which # contains the message in the Maildir mailbox. # # This relies on the system having a version of "touch" # such as the GNU fileutils touch which can handle # the date format found in the "From " line. # # The script tries to cope with errant forms of the # Mbox "From " line which it may encounter, but if # there is something really screwy in a From line, # then perhaps the script will fail when "touch" # is given an invalid date. Please report the # exact nature of any such "From " line! # # # Flagged # Replied # Read = Seen # Tagged for Deletion # # In the Mbox message, flags for these are found in the # "Status: N" or "X-Status: N" headers, where "N" is 0 # or more of the following characters in the left column. # # They are converted to characters in the right column, # which become the last characters of the file name, # following the ":2," which indicates IMAP message status. # # # F -> F Flagged # A -> R Replied # R -> S Read = Seen # D -> T Tagged for Deletion (Trash) # # This is based on the work of Philip Mak who wrote a # completely separate Mbox -> Maildir converter called # perfect_maildir and posted it to the Mutt-users mailing # list on 25 December 2001: # # http://www.mail-archive.com/mutt-users@mutt.org/msg21872.html # # Michael Best originally integrated those changes into mb2md. # # In addition, the names of the message files in the Maildir are of a # regular length and are of the form: # # 7654321.000123.mbox:2,xxx # # Where "7654321" is the Unix time in seconds when the script was # run and "000123" is the six zeroes padded message number as # messages are converted from the Mbox file. "xxx" represents zero or # more of the above flags F, R, S or T. # # # --------------------------------------------------------------------- # # # USAGE # ===== # # Run this as the user of the mailboxes, not as root. # # # mb2md MBROOT MBDIR [DEST] # # # MBROOT Directory, relative to the user's home directory, # which is where the the MBDIR directory is located. # # # MBDIR Directory, relative to MBROOT where the Mbox files # are. There are two special cases: # # 1 - "None" # # 2 - "Inbox" # # If it is set to "None" then mailboxes in the MBROOT # directory will be converted and placed in the # DEST directory. (Typically the Inbox directory # which in this instance is also functioning as a # folder for other mailboxes.) # # If this is set to "Inbox" then the source will # be the single mailbox at /var/spool/mail/blah for # user blah and the destination mailbox will be the # DEST mailbox itself. # # Except in this "Inbox" case, the MBDIR directory # name will be encoded into the new mailboxes' names. # See the examples below. # # This script will not work with mailbox files which # contain spaces in their names. # # Expect trouble if an files in MBDIR directory # are not proper Mbox mailbox files. # # This does not save an UW IMAP dummy message file # at the start of the Mbox file. Small changes # in the code could adapt it for looking for # other distinctive patterns of dummy messages too. # # Don't let the source directory you give as MBDIR # contain any "."s in its name, unless you want to # create subfolders from the IMAP user's point of # view. See the example below. # # # DEST Directory relative to user's home directory where the # Maildir format directories will be created. # If not given, then the destination will be ~/Maildir . # Typically, this is what the IMAP server sees as the # Inbox and the folder for all user mailboxes. # # # # Example # ======= # # We have a bunch of directories of Mbox mailboxes located at # /home/blah/oldmail/ # # /home/blah/oldmail/fffff # /home/blah/oldmail/ggggg # /home/blah/oldmail/xxx/aaaa # /home/blah/oldmail/xxx/bbbb # /home/blah/oldmail/xxx/cccc # /home/blah/oldmail/xxx/dddd # /home/blah/oldmail/yyyy/huey # /home/blah/oldmail/yyyy/duey # /home/blah/oldmail/yyyy/louie # # With the UW IMAP server, fffff and ggggg would have appeared in the root # of this mail server, along with the Inbox. aaaa, bbbb etc, would have # appeared in a folder called xxx from that root, and xxx was just a folder # not a mailbox for storing messages. # # We also have the mailspool Inbox at: # # /var/spool/mail/blah # # # To convert these, as user blah, we give the first command: # # mb2md xyz Inbox # # In this case, the first argument is irrelevant - "xyz" is ignored. # # The main Maildir directory will be created if it does not exist. # (This is true of any argument options, not just MBDIR = "Inbox".) # # /home/blah/Maildir/ # # It has the following subdirectories: # # /home/blah/Maildir/tmp/ # /home/blah/Maildir/new/ # /home/blah/Maildir/cur/ # # Then /var/spool/blah file is read, split into individual files and # written into /home/blah/Maildir/new/ . # # Now we give the second command: # # mb2md oldmail None # # This reads the fffff and ggggg Mbox mailboxes and creates: # # /home/blah/Maildir/.fffff/ # /home/blah/Maildir/.ggggg/ # # Now we give the third command: # # mb2md oldmail xxx # # Then all the mailboxes: # # /home/blah/oldmail/xxx/aaaa # /home/blah/oldmail/xxx/bbbb # /home/blah/oldmail/xxx/cccc # /home/blah/oldmail/xxx/dddd # # are converted into Maildir format mailboxes in the following # directories: # # /home/blah/Maildir/.xxx.aaaa/ # /home/blah/Maildir/.xxx.bbbb/ # /home/blah/Maildir/.xxx.cccc/ # /home/blah/Maildir/.xxx.aaaa/ # # This suits Courier IMAP fine, and these will appear to the IMAP # client as four mailboxes in the folder "xxx" within the Inbox # folder. # # The final command: # # mb2md oldmail yyyy # # does the rest. The result, from the IMAP client's point of view is: # # Inbox ----------------- # | # | fffff ----------- # | ggggg ----------- # | # - xxx # | | aaaa -------- # | | bbbb -------- # | | cccc -------- # | | dddd -------- # | # - yyyy # | huey ------- # | duey ------- # | louie ------ # # Note that although ~/Maildir/.xxx/ and ~/Maildir/.yyyy may appear # as folders to the IMAP client the above commands to not generate # any Maildir folders of these names. These are simply elements # of the names of other Maildir directories. # # With a separate run of this script, using the MBDIR = "None" # approach, it would be possible to create mailboxes which # appear at the same location as far as the IMAP client is # concerned. By having Mbox mailboxes in some directory: # ~/oldmail/nnn/ of the form: # # /home/blah/oldmail/nn/xxxx # /home/blah/oldmail/nn/yyyyy # # then the command: # # mb2md oldmail/nn None # # will create two new Maildirs: # # /home/blah/Maildir/.xxx/ # /home/blah/Maildir/.yyyy/ # # Then what used to be the xxx and yyyy folders now function as # mailboxes too. Netscape 4.77 needed to be put to sleep and given ECT # to recognise this - deleting the contents of (Win2k example): # # C:\Program Files\Netscape\Users\uu\ImapMail\aaa.bbb.ccc\ # # where "uu" is the user and "aaa.bbb.ccc" is the IMAP server # # I often find that deleting all this directory's contents, except # "rules.dat", forces Netscape back to reality after its IMAP innards # have become twisted. Then maybe use File > Subscribe - but this # seems incapable of subscribing to folders. # # For Outlook Express, select the mail server, then click the # "IMAP Folders" button and use "Reset list". In the "All" # window, select the mailboxes you want to see in normal # usage. # # # This script does not recurse subdirectories or delete old mailboxes. # # Be sure not to be accessing the Mbox mailboxes while running this # script. It does not attempt to lock them. Likewise, don't run two # copies of this script either. # # # Trickier usage . . . # ==================== # # If you have a bunch of mailboxes in a directory ~/oldmail/doors/ # and you want them to appear in folders such as: # # ~/Maildir/.music.bands.doors.Jim # ~/Maildir/.music.bands.doors.John # # etc. so they appear in an IMAP folder: # # Inbox ----------------- # | music # | bands # | doors # | Jim # | John # | Robbie # | Ray # # Then you should rename the source directory to: # # ~/oldmail/music.bands.doors/ # # then you can use: # # mb2md oldmail music.bands.doors # #------------------------------------------------------------------------------ require 'stat.pl'; # Get user ID and check home dir # exists. ($name, $passwd, $uid, $gid, $quota, $comment, $gcos, $dir, $shell) = getpwuid($<); -e $dir or die("Fatal: home dir $dir doesn't exist.\n") ; &Stat($dir); if ($uid != $st_uid) {die("Fatal: $name is $uid, but $dir is owned by $st_uid.\n");} # Looks like dead wood, since we are not using # the current directory setting - but keep it. chdir($dir) or die "Fatal: unable to chdir to $dir.\n"; # Get arguments and determine source # and target directories. # # Spit dummy if we don't have 2 or 3 # arguments. Perl args start at 0. if ( !($ARGV[1]) || ($ARGV[3]) ) { die("There should be 2 or 3 args. Read the script for full doco!\n"); } # Get the 2 or 3 arguments #------------------------- # eg. mbroot = oldmail # # Spit dummy if this directory does not # exist, except for if MBDIR is "Inbox", # in which case MBROOT is ignored. # So get the MBDIR argument, before # testing whether MBROOT exists. $mbroot = $ARGV[0]; # eg. mbdir = xxx # # Check the directory we want exists # after we have done the substitions # for "None" and "Inbox". $mbdir = $ARGV[1]; # Handle the third argument, if any. if ($ARGV[2]) {$dest = $ARGV[2];} else {$dest = "Maildir";} printf("MBROOT: $mbroot MBDIR: $mbdir DEST: $dest \n"); # Handle the arguments #--------------------- # # First, we set the $target string - # the mailspool directory where the # mails or mailboxes will be written. # We make sure it exists, or we # create it. # # Create the full path name of # the target directory, eg.: # /home/blah/Maildir/ $target = "$dir/$dest" ; # A debug line - we report this later. # printf("Target directory is $target/\n"); # If the directory doesn't exist, # make it. -d $target or mkdir $target,0700 or die("Fatal: Directory $target doesn't exist and can't be created.\n"); -d "$target/tmp" or mkdir("$target/tmp",0700) or die("Fatal: Unable to make $target/tmp/ subdirectory.\n"); -d "$target/new" or mkdir("$target/new",0700) or die("Fatal: Unable to make $target/new/ subdirectory.\n"); -d "$target/cur" or mkdir("$target/cur",0700) or die("Fatal: Unable to make $target/cur/ subdirectory.\n"); # Except for the MBDIR == "Inbox" case # check to see that the directory # specifed in MBDIR exists. if ($mbdir ne "Inbox") { -d "$dir/$mbroot" or die("Fatal: MBROOT directory $dir/$mbroot/ does not exist.\n"); -d "$dir/$mbroot" or die("Fatal: MBROOT $dir/$mbroot is not a directory.\n"); } # Depending on the three possible # modes of setting MBDIR, check that # the input directories (or mailspool # file) exists. # # Also set the $tfile variable. This # will form part of the name of # the mailbox we create (except for # when MBDIR is "Inbox". # # Create the full path source # directory. This is where we will # find the Mbox mailbox files. # We will add the trailing / when # we use it. # # Normally it will be, for example, # # /home/blah/oldmail/xxx # # but if mbdir == None, then it # will be: # # /home/blah/oldmail # # and if mbdir == Inbox, then it # will be: # # /var/spool/mail if ($mbdir eq "None") { $sourcedir = "$dir/$mbroot"; # No check needed - we have already # checked this exists. # # $tfile will be "." because there # is no directory component in the # names of the new mailboxes - their # names will come from the names # of the Mbox files. eg. # # .fffff/ $tfile = "."; } elsif ($mbdir eq "Inbox") { $sourcedir = "/var/spool/mail"; # Check the mailspool directory exists # and then check this user's file # exists. -e "$sourcedir" or die("Fatal: MBDIR = Inbox but directory $sourcedir/ does not exist.\n"); -d "$sourcedir" or die("Fatal: MBDIR = Inbox but $sourcedir is not a directory.\n"); -e "$sourcedir/$name" or die("Fatal: MBDIR = Inbox but $sourcedir/$name mailspool file does not exist.\n"); if (-d "$sourcedir/$name") {die("Fatal: MBDIR = Inbox but $sourcedir/$name is a directory.\n");} # $tfile will be "", because we are # putting it straight in # /home/blah/Maildir/ $tfile = ""; } else { $sourcedir = "$dir/$mbroot/$mbdir"; # This is the usual case, for instance # /home/blah/oldmail/xxx -e "$sourcedir" or die("Fatal: MBDIR directory $sourcedir/ does not exist.\n"); -d "$sourcedir" or die("Fatal: MBDIR $sourcedir is not a directory.\n"); # $tfile will be ".$mbdir". eg. # # .xxx. # # and we will add the rest of the # mailbox name as we convert each # mailbox. eg. # # .xxx.aaa/ # # where "aaa" includes things such # as the date-time in Unix seconds since # 1970 and a sequential number to ensure # each message has a unique name, followed # by characters representing IMAP flags. $tfile = ".$mbdir."; } # More debug lines. printf("\nSource directory: $sourcedir/ \n"); if ($mbdir eq "Inbox") { printf("Target mailbox: $target/ \n\n\n"); } else { printf("Target mailboxes: $target/$tfile??????? \n\n\n"); } # Outer Loop #----------- # # Now we have a loop which looks for # any file in the $sourcedir directory # and turns it into a Maildir mailbox. # We assume all files are Mbox # mailboxes. # # In one case, when $mbdir is "Inbox" # then the loop must work only once, # to find the user's mailspool file. # # readir() gets file/directory names # from a directory previously opened # with opendir(). We should close it # afterwards too. # # SDIR is a file handle. # # We ignore what we find if it is a # directory. There will be # directories, such as "." and ".." # at least. # # This test must be done inside the # while loop, not part of the # condition of the loop. # # I use -d to test for it being # a directory. # # All these -d and -T thingos are # in /func/X.html at perldoc.com. opendir(SDIR, $sourcedir) or die("Fatal: Cannot open source directory $sourcedir/ \n"); # Count the number of mailboxes, or # at least files, we found. $mailboxcount = 0; while ($sfile = readdir(SDIR)) { # Test firstly that this is a file, # and secondly, if $mbdir is "Inbox" # that it is the user's mailspool # file. if ( (!-d "$sourcedir/$sfile") && ( !($mbdir eq "Inbox") || ($sfile eq $name) ) ) { printf("Found $sourcedir/$sfile\n"); $mailboxcount++; # We have located the file to convert. # # In most cases, we need to create # a new Maildir format mailbox, with # its directory and three # subdirectories and then chdir to # the mailbox directory before # entering the conversion loop. # # But if MBDIR is "Inbox" then # we don't need to make any new # mailboxes, since we are going # to write, for instance, into # # /home/blah/Maildir/new/ # # If MBDIR is "None" then we will # convert into, for instance: # # /home/blah/Maildir/.fffff/new/ # /home/blah/Maildir/.ggggg/new/ # # Any other value for mdir, such as # "xxx" will make the destination # $target/.$mbroot/$tfile$mbdir/new/ # # /home/blah/Maildir/.xxx.aaaa/new/ # /home/blah/Maildir/.xxx.bbbb/new/ if ($mbdir eq "Inbox") { # No mailbox to create, but set # $targetmb appropriately. $targetmb = "$target" ; printf("Target Maildir is $targetmb \n\n") ; } else { # Set the target mailbox and create it. $targetmb = "$target/$tfile$sfile" ; printf("Target Maildir is $targetmb \n") ; -d $targetmb or mkdir $targetmb,0700 or die("Fatal: Directory $targetmb doesn't exist and can't be created.\n"); -d "$targetmb/tmp" or mkdir("$targetmb/tmp",0700) or die("Fatal: Unable to make $targetmb/tmp/ subdirectory.\n"); -d "$targetmb/new" or mkdir("$targetmb/new",0700) or die("Fatal: Unable to make $targetmb/new/ subdirectory.\n"); -d "$targetmb/cur" or mkdir("$targetmb/cur",0700) or die("Fatal: Unable to make $targetmb/cur/ subdirectory.\n"); } # Change to the target mailbox directory. chdir "$targetmb" ; # Inner loop #----------- # # Converts a Mbox to multiple files # in a Maildir. # This is adapted from mbox2maildir. # # Open the Mbox mailbox file. if (open(MBOX, "$sourcedir/$sfile")) { printf("Converting Mbox $sourcedir/$sfile . . . \n"); } else { die("Fatal: unable to open input mailbox file: $sourcedir/$sfile ! \n"); } # This loop scans the input mailbox for # a line starting with "From ". The # "^" before it is pattern-matching # lingo for it being at the start of a # line. # # Each email in Mbox mailbox starts # with such a line, which is why any # such line in the body of the email # has to have a ">" put in front of it. # # This is not required in a Maildir # mailbox, and some majik below # finds any such quoted "> From"s and # gets rid of the "> " quote. # # Each email is put in a file # in the new/ subdirectory with a # name of the form: # # nnnnnnnnn.cccc.mbox:2,XXXX # # where: # "nnnnnnnnn" is the Unix time since # 1970 when this script started # running, incremented by 1 for # every email. This is to ensure # unique names for each message # file. # # ".cccc" is the message count of # messages from this mbox. # # ".mbox" is just to indicate that # this message was converted from # an Mbox mailbox. # # ":2," is the start of potentially # multiple IMAP flag characters # "XXXX", but may be followed by # nothing. # # This is sort-of compliant with # the Maildir naming conventions # specified at: # # http://www.qmail.org/man/man5/maildir.html # # This approach does not involve the # process ID or the hostname, but it is # probably good enough. # # When the IMAP server looks at this # mailbox, it will move the files to # the cur/ directory and change their # names as it pleases. In the case # of Courier IMAP, the names will # become like: # # 995096541.25351.mbox:2,S # # with 25351 being Courier IMAP's # process ID. The :2, is the start # of the flags, and the "S" means # that this one has been seen by # the user. (But is this the same # meaning as the user actually # having opened the message to see # its contents, rather than just the # IMAP server having been asked to # list the message's Subject etc. # so the client could list it in the # visible Inbox?) # # This contrasts with a message # created by Courier IMAP, say with # a message copy, which is like: # # 995096541.25351.zair,S=14285:2,S # # where ",S=14285" is the size of the # message in bytes. # # Courier Maildrop's names are similar # but lack the ":2,XXXX" flags . . . # except for my modified Maildrop # which can deliver them with a # ":2,T" - flagged for deletion. # # I have extended the logic of the # per-message inner loop to stop # saving a file for a message with: # # Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA # # This is the dummy message, always # at the start of an Mbox format # mailbox file - and is put there # by UW IMAPD. Since quite a few # people will use this for # converting from a UW system, # I figure it is worth it. # # I will not save any such message # file for the dummy message. # # Plan # ---- # # We want to read the entire Mbox file, whilst # going through a loop for each message we find. # # We want to read all the headers of the message, # starting with the "From " line. For that "From " # line we want to get a date. # # For all other header lines, we want to store them # in $headers whilst parsing them to find: # # 1 - Any flags in the "Status: " or "X-Status: " lines. # # 2 - A subject line indicating this is the dummy message # at the start (typically, but not necessarily) of # the Mbox. # # Once we reach the end of the headers, we will crunch any # flags we found to create a file name. Then, unless this is # the dummy message we create that file and write all the # headers to it. # # Then we continue reading the Mbox, converting ">From " to # "From " and writing it to the file, until we reach one of: # # 1 - Another "From " line (indicating the start of another # message). # # or # # 2 - The end of the Mbox. # # In the former case, which we detect at the start of the loop # we need to close the file and touch it to alter its date-time. # # In the later case, we also need to close the file and touch # it to alter its date-time - but this is beyond the end of the # loop. # Variables # --------- $messagecount = 0; # For generating unique filenames for # each message. Initialise it here with # numeric time in seconds since 1970. $unique = time; # Name of message file to delete if we found that # it was created by reading the Mbox dummy message. $deletedummy = ''; # To store the complete "From (address) (date-time) # which delineates the start of each message # in the Mbox $fromline = ''; # Set to 1 when we are reading the header lines, # including the "From " line. # # 0 means we are reading the message body and looking # for another "From " line. $inheaders = 0; # Variable to hold all headers (apart from # the first line "From ...." which is not # part of the message itself. $headers = ''; # Variable to hold the accumulated characters # we find in header lines of the type: # # Status: # X-Status: $flags = ''; # To build the file name for the message in. $messagefn = ''; # The date string from the "From " line of each # message will be written here - and used by # touch to alter the date-time of each message # file. Put non-date text here to make it # spit the dummy if my code fails to find a # date to write into this. $receivedate = 'Bogus'; while() { if (/^From /) { # We are reading the "From " line which has an # email address followed by a receive date. # Turn on the $inheaders flag until we reach # the end of the headers. $inheaders = 1; # If this is not the first run through the loop # then this means we have already been working # on a message. if ($messagecount > 0) { # If so, then close that message file and then # use /bin/touch to change its date-time. # # Note this code should be duplicated to do # the same thing at the end of the while loop # since we must close and touch the final message # file we were writing when we hit the end of the # Mbox file. close (OUT); system ("$touchcmd -d\' $receivedate \' $messagefn ") == 0 or printf ("Touch failed with date string: $receivedate for message: $messagefn \n"); } # Because we opened the Mbox file without any # variable, I think this means that we have its # current line in Perl's default variable "$_". # So all sorts of pattern matching magic works # directly on it. # We are currently reading the first line starting with # "From " which contains the date we want. # # This will be of the form: # # From dduck@test.org Wed Nov 24 11:05:35 1999 # # at least with UW-IMAP. # # However, I did find a nasty exception to this in my # tests, of the form: # # "bounce-MusicNewsletter 5-rw=test.org"@announce2.mp3.com # # This makes it trickier to get rid of the email address, # but I did find a way. I can't rule out that there would # be some address like this with an "@" in the quoted # portion too. # # Unfortunately, testing with an old Inbox Mbox file, # I also found an instance where the email address # had no @ sign at all. It was just an email # account name, with no host. # # I could search for the day of the week. If I skipped # at least one word of non-whitespace (1 or more contiguous # non-whitespace characters) then searched for a day of # the week, then I should be able to avoid almost # every instance of a day of the week appearing in # the email address. # # Do I need a failsafe arrangement to provide some # other date to touch if I don't get what seems like # a date in my resulting string? For now, no. # # I will take one approach if there is an @ in the # "From " line and another (just skip the first word # after "From ") if there is no @ in the line. # # If I knew more about Perl I would probably do it in # a more elegant way. # Copy the current line into $fromline. $fromline = $_; # Now get rid of the "From ". " =~ s" means substitute. # Find the word "From " at the start of the line and # replace it with nothing. The nothing is what is # between the second and third slash. $fromline =~ s/^From // ; # Likewise get rid of the email address. # This first section is if we determine there is one # (or more . . . ) "@" characters in the line, which # would normally be the case. if ($fromline =~ m/@/) { # The line has at least one "@" in it, so we assume # this is in the middle of an email address. # # If the email address had no spaces, then we could # get rid of the whole thing by searching for any number # of non-whitespace characters (\S) contiguously, and # then I think a space. Subsitute nothing for this. # # $fromline =~ s/(\S)+ // ; # # But we need something to match any number of non-@ # characters, then the "@" and then all the non-whitespace # characters from there (which takes us to the end of # "test.org") and then the space following that. # # A tutorial on regular expressions is: # # http://www.perldoc.com/perl5.6.1/pod/perlretut.html # # Get rid of all non-@ characters up to the first "@": $fromline =~ s/[^@]+//; # Get rid of the "@". $fromline =~ s/@//; } # If there was an "@" in the line, then we have now # removed the first one (lets hope there aren't more!) # and everything which preceded it. # # If there wasn't an "@" in the line, then we have # just a raw word, such as an email account name with # no "@domain.com" after it. # # In either case, we want to delete the word which # follows and the space which follows it. Then, # all that should be left is the date-time. $fromline =~ s/(\S)+ //; # Stash the date-time for later use. We will use it # to touch the file after we have closed it. $receivedate = $fromline; # Debugging lines: # # print "$receivedate is the receivedate of message $messagecount.\n"; # $receivedate = "Wed Nov 24 11:05:35 1999"; # # To look at the exact date-time of files: # # ls -lFa --full-time # # End of handling the "From " line. } # Now process header lines which are not the "From " line. if ( ($inheaders eq 1) && (! /^From /) ) { # Now we are reading the header lines after the "From " line. # Keep looking for the blank line which indicates the end of the # headers. # ".=" means append the current line to the $headers # variable. # # For some reason, I was getting two blank lines # at the end of the headers, rather than one, # so I decided not to read in the blank line # which terminates the headers. # # Delete the "unless ($_ eq "\n")" to get rid # of this kludge. $headers .= $_ unless ($_ eq "\n"); # Now scan the line for various status flags # and to fine the Subject line. $flags .= $1 if /^Status: ([A-Z]+)/; $flags .= $1 if /^X-Status: ([A-Z]+)/; $subject = $1 if /^Subject: (.*)$/; # Now look out for the end of the headers - a blank # line. When we find it, create the file name and # analyse the Subject line. if ($_ eq "\n") { # We are at the end of the headers. Set the # $inheaders flag back to 0. $inheaders = 0; # Create the file name for the current message. # Note that Philip Mal's perfect_maildir put the # converted messages in /cur/ rather than /new/ # This makes some sense because they are not # really freshly recieved. # # A simple version of this would be: # # $messagefn = "new/$unique.$messagecount.mbox:2,"; # # This would create names with $messagecount values of # 1, 2, etc. But for neatness when looking at a # directory of such messages, sorted by filename, # I want to have leading zeroes on message count, so # that they would be 000001 etc. This makes them # appear in message order rather than 1 being after # 19 etc. So this is good for up to 999,999 messages # in a mailbox. It is a cosmetic matter for a person # looking into the Maildir directory manually. # To do this, use sprintf instead with "%06d" for # 6 characters of zero-padding: $messagefn = sprintf ("new/%d.%06d.mbox:2,", $unique, $messagecount) ; # Append flag characters to the end of the # filename, according to flag characters # collected from the message headers $messagefn .= 'F' if $flags =~ /F/; # Flagged. $messagefn .= 'R' if $flags =~ /A/; # Replied to. $messagefn .= 'S' if $flags =~ /R/; # Seen or Read. $messagefn .= 'T' if $flags =~ /D/; # Tagged for deletion. # Opens filename $messagefn for output (>) with filehandle OUT. open(OUT, ">$messagefn") or die("Fatal: unable to create new message"); # Count the messages. $messagecount++; # Only for the first message, # check to see if it is a dummy. # Delete the message file we # just created if it was for the # dummy message at the start # of the Mbox. # # Add search terms as required. # The last 2 lines are for rent. # # "m" means match the regular expression, # but we can do without it. # # Do I need to escape the ' in "DON'T"? # I didn't in the original version. if ( ($messagecount == 1) && ( ($subject =~ m/^DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA/) || ($subject =~ m/^Bulwinkle next Pope/) || ($subject =~ m/^Buy US War Bonds!/) ) ) { # Stash the file name of the dummy message so we # can delete it later. $deletedummy = "$messagefn"; } # Print the collected headers to the message file. print OUT "$headers"; # Clear $headers and $flags ready for the next message. $headers = ''; $flags = ''; # End of processing the headers once we found the # blank line which terminated them } # End of dealing with the headers. } if ( $inheaders eq 0) { # We are now processing the message body. # # Now we have passed the headers to the # output file, we scan until the while # loop finds another "From " line. # # We want to copy every part of the message # body to the output file, except for the # quoted ">From " lines, which was the # way the IMAP server encoded body lines # starting with "From ". # # Pattern matching Perl majik to # get rid of an Mbox quoted From. # # This works on the default variable "$_" which # contains the text from the Mbox mailbox - I # guess this is the case because of our # (open(MBOX ....) line above, which did not # assign this to anything else, so it would go # to the default variable. This enables # inscrutably terse Perlisms to follow. # # "s" means "Subsitute" and it looks for any # occurrence of ">From" starting at the start # of the line. When it finds this, it replaces # it with "From". # # So this finds all instances in the Mbox message # where the original line started with the word # "From" but was converted to ">From" in order to # not be mistaken for the "From ..." line which # is used to demark each message in the Mbox. # This was was a destructive conversion because # any message which originally had ">From" at the # start of the line, before being put into the # Mbox, will now have that line without the ">". s/^>From /From /; # Glorious tersness here. Thanks Simon for # explaining this. # # "print OUT" means print the default variable to # the file of file handle OUT. This is where # the bulk of the message text is written to # the output file. print OUT or die("Fatal: unable to write to new message"); # End of the if statement dealing with message body. } # End of while (MBOX) loop. } # After all the messages have been # converted, check to see if the # first one was a dummy. # if so, delete it and make # the message count one less. if ($deletedummy ne "") { printf("\n\nDummy mail system first message detected and not saved.\n"); unlink $deletedummy; $messagecount--; } # Close the input file. close(MBOX); # Close the output file, and dupicate the code # from the start of the while loop which touches # the date-time of the most recent message file. close(OUT); system ("$touchcmd -d\' $receivedate \' $messagefn ") == 0 or printf ("Touch failed with date string: $receivedate for message: $messagefn \n"); printf(" $messagecount messages.\n\n"); printf("$mailboxcount files processed.\n"); # End of the if statement which does # everything we want to do in the "while ($sfile = readdir(SDIR))" loop. } # End of the "while ($sfile = readdir(SDIR))" loop } closedir(SDIR); exit 0; # My debbugging placeholder I can put somewhere to show how far the script ran. # die("So far so good.\n\n");