#!/usr/bin/perl -w
#
# deDuplicate.pl
# version 1.0  9 Aug, 2018
#
# The script for deleting duplicate messages in mailboxes.
# Messages are considered duplicates if their INTERNALDATE and SIZE attributes are equal.
#
#
# Mail your comments to support@communigatepro.ru
#

use strict;
use CLI;  #get one from communigatepro.ru/CGPerl


####  YOU SHOLD REDEFINE THESE VARIABLES !!!

my $CGServerAddress='127.0.0.1';  #IP or domain name;
my $Login='postmaster';
my $Password='pass';

my $InboxOnly=0; # change to 1 to scan INBOXes only 

my $compareMethod=0; # 0 - compare INTERNALDATE and SIZE attributes to consider messages as equal. Fast but won't work for RPOP messages
                     # 1 - compare using Message-ID attribute. Slow.
                   
# also un-commend one of the lines below to choose between all domains/one domain/one account

#### end of the customizeable variables list


my $imap = new IO::Socket::INET(   PeerAddr => $CGServerAddress,
                                    PeerPort => 143,
                                    Timeout  => 600
                                  ) 
   || die "*** Can't connect to CGPro via IMAP.\n";                                

$imap->autoflush(1);
my $responseLine = <$imap>;

print $imap "x LOGIN $Login $Password\015\012";
do {
  $responseLine = <$imap>;
}until($responseLine =~/^x /);
die "*** Can't login to CGPro IMAP: $responseLine.\n"
  unless($responseLine =~ /^x OK/);

print $imap "x ENABLE EXTENSIONS\015\012";
do {
  $responseLine = <$imap>;
}until($responseLine =~/^x /);



my $cli = new CGP::CLI( { PeerAddr => $CGServerAddress,
                          PeerPort => 106,
                          login    => $Login,
                          password => $Password } )
   || die "*** Can't login to CGPro CLI: ".$CGP::ERR_STRING."\n";


# un-comment one of the below lines

#processAccount('aaa@company.com');
processDomain('company.com');
#processAllDomains();
#processFile('accountList.txt');



print "Done\n";
$cli->Logout();
print $imap "x LOGOUT\015\012";
exit;

sub processAllDomains {
  my $DomainList = $cli->ListDomains()
               || die "*** Can't get the domain list: ".$cli->getErrMessage.", quitting";
  foreach(@$DomainList) {
    processDomain($_);
  }
}         

sub processDomain {
  my $domain=$_[0];
  print "Domain: $domain\n";

  my $cookie="";
  do {
    my $data=$cli->ListDomainObjects($domain,5000,undef,'ACCOUNTS',$cookie);
    unless($data) {
      print "*** Can't get accounts for $domain: ".$cli->getErrMessage."\n";
      return;
    }
    $cookie=$data->[4];
    foreach(keys %{$data->[1]} ) {
      processAccount("$_\@$domain"); 
    }
  }while($cookie ne '');
 
}

sub processFile {
  my ($fname)=@_;
  open(FILE,$fname) || die "can't open $fname: $!\n";
  while(<FILE>) {
    chomp;
    next if(/^#/);
    my $account=$_;
    if(length($account)>3) {
      processAccount($account);
    }
  }
  close(FILE);
}

sub processAccount {
  my $account=$_[0];
  #print "Account: $account\n";

  if($InboxOnly) {
    processMailbox("$account/INBOX");
    return; 
  }
  
  my $mailboxesList=$cli->ListMailboxes(accountName=>$account);  
  unless($mailboxesList) {
    print "*** Can't list mailboxes for $account:".$cli->getErrMessage."\n";
    return;
  }
  foreach(keys %$mailboxesList) {
    my $data=@$mailboxesList{$_};
    if(ref $data eq 'ARRAY') {
      $data=@$data[0];
    }   
    if(ref $data eq 'HASH') {
      my $nMessages=@$data{'Messages'};
      if(defined $nMessages && $nMessages eq 0) {
        #print "skipping empty $account/$_\n";
        next;
      }
      processMailbox("$account/$_"); 
    }
  }  
}

sub processMailbox {  
  my $mailbox=$_[0];
  my $nMessages=0;
  my %msgList;
  my @delList;
  #print "Mailbox: $mailbox\n";
  
  print $imap "x SELECT \"~$mailbox\"\015\012";
  do {
    $responseLine = <$imap>;
    if($responseLine =~ /^\* (\d*) EXISTS/) {
      $nMessages=$1;
    }
  }until($responseLine =~/^x /);
  unless($responseLine =~ /^x OK/) {
    print "*** Can't select $mailbox: $responseLine.\n";
    return;
  }
  if($nMessages < 2) {
    return;
  }

  for(my $xMsg=1;$xMsg<=$nMessages;$xMsg++) {
    my $hashKey=undef;
    if($compareMethod==0) {
      my $size=0;
      my $date='';
  
      print $imap "f FETCH $xMsg (RFC822.SIZE INTERNALDATE)\015\012";
      do {
        $responseLine = <$imap>;
        if($responseLine =~ /^\* .+ FETCH .+RFC822.SIZE (\d+)/) {
          $size=$1;
        }
        if($responseLine =~ /^\* .+ FETCH .+INTERNALDATE \"(.+)\"/) { #"
          $date=$1;
        }
      }until($responseLine =~/^f /);
      unless($responseLine =~ /^f OK/) {
        print "*** Can't fetch msg $xMsg: $responseLine.\n";
      }
      $hashKey=$date.$size;
      
    } elsif($compareMethod==1) {
      my $msgText;
      print $imap "f FETCH $xMsg (BODY.PEEK[HEADER.FIELDS (Message-ID)])\015\012";
      do {
        $responseLine = <$imap>;
        if($responseLine =~ /^\* .+ FETCH .+ \{(\d+)\}/) {
          my $msgSize=$1;
          while($msgSize>0) {
            $responseLine = <$imap>;
            $msgSize-=length($responseLine);
            local $/="\r\n";
            chomp($responseLine);
            $msgText .= $responseLine."\n";
          }
          $responseLine = <$imap>;
        }
      }until($responseLine =~/^f /);
      unless($responseLine =~ /^f OK/) {
        print "*** Can't fetch msg $xMsg: $responseLine.\n";
      }
      if($msgText=~/^Message-ID.*\<(.*)\>$/im ) {
        $hashKey=$1
      }
      
    } else {
      die "unknown compare method";
    }
    
    if(!$hashKey) {
      print "Error: unable to fetch message $xMsg from $mailbox\n";
    }elsif(exists($msgList{$hashKey})) {
      push(@delList,$xMsg);
    } else {
      $msgList{$hashKey}=1;
    }
  }

  if(@delList >0) {
    print "Mailbox: $mailbox (". scalar(@delList)." messages to delete)\n";
    #print "deletion list=$delList\n";

    #print "deleting\n";
    print $imap "s STORE ".join(',',@delList)." +FLAGS (\\Deleted)\015\012";

    do {
      $responseLine = <$imap>;
    }until($responseLine =~/^s /);
    unless($responseLine =~ /^s OK/) {
      print "*** Can't store flags for $mailbox messages: $responseLine.\n";
    }


    print $imap "c CLOSE\015\012";
    do {
      $responseLine = <$imap>;
    }until($responseLine =~/^c /);
    unless($responseLine =~ /^c OK/) {
      print "*** Can't close $mailbox: $responseLine.\n";
    }
  }
  
}

__END__
