根据md5判断文件是否相同,重复的将被删除,只保留最先扫描的文件,可设置多个文件夹进行比较,将依次扫描;代码很简陋,性能一般,但基本够用了。

array(),'refile'=>array());
foreach($dirs as $dir){
    tree($arr, $dir);
}

echo 'scan files: ' . count($arr['file']) . "\r\n";
echo 'delete duplicate files: ' . count($arr['refile']) . "\r\n";

//遍历、比较并删除重复
function tree(&$arr, $root, $directory = null, $dir_name='') 
{
    $root = rtrim($root,'/');
    if($directory === null){
        $directory = $root;
    }
    $mydir = dir($directory);
    while($file = $mydir->read())
    {
        if((is_dir("$directory/$file")) AND ($file != ".") AND ($file != ".."))
        {
            tree($arr, $root, "$directory/$file", "$dir_name/$file");
        }
        else if(($file != ".") AND ($file != ".."))
        {
            $path = "$root$dir_name/$file";
            $md5 = md5_file($path);
            if(has($arr['file'], $md5)){
                echo "duplicate $path\r\n";
                if(unlink($path)){
                    echo 'delete success' . "\r\n";
                }
                $arr['refile'][] = array('path'=>$path,'md5'=>$md5);
            }
            else{
                echo $path . "\r\n";
                $arr['file'][] = array('path'=>$path,'md5'=>$md5);
            }
        }
    }
    $mydir->close();
}

//判断md5是否存在
function has($arr, $val){
    foreach($arr as $a){
        if($a['md5'] ==    $val){
            return true;
        }
    }
    return false;
}

运行结果:

...
e:/元器件资料/ABB/低压/双电源CB级.pdf
duplicate e:/元器件资料/ABB/低压/双电源PC级.pdf
delete success
e:/元器件资料/ABB/低压/塑壳断路器.pdf
e:/元器件资料/ABB/低压/开关外型尺寸和电路图2002.pdf
e:/元器件资料/ABB/低压/微型断路器等.pdf
e:/元器件资料/ABB/低压/微断__S800.pdf
e:/元器件资料/ABB/低压/电机控制与保护产品.pdf
e:/元器件资料/ABB/低压/空气断路器Emax X1.pdf
e:/元器件资料/ABB/低压/空气断路器Emax.pdf
scan files: 574
delete duplicate files: 12