根据md5判断文件是否相同,重复的将被删除,只保留最先扫描的文件,可设置多个文件夹进行比较,将依次扫描;代码很简陋,性能一般,但基本够用了。
array(),'refile'=>array());
foreach($dirs as $dir){
tree($arr, $dir);
}
echo 'scan files: ' . count($arr['file']) . "\r\n";
echo 'delete duplicate files: ' . count($arr['refile']) . "\r\n";
//遍历、比较并删除重复
function tree(&$arr, $root, $directory = null, $dir_name='')
{
$root = rtrim($root,'/');
if($directory === null){
$directory = $root;
}
$mydir = dir($directory);
while($file = $mydir->read())
{
if((is_dir("$directory/$file")) AND ($file != ".") AND ($file != ".."))
{
tree($arr, $root, "$directory/$file", "$dir_name/$file");
}
else if(($file != ".") AND ($file != ".."))
{
$path = "$root$dir_name/$file";
$md5 = md5_file($path);
if(has($arr['file'], $md5)){
echo "duplicate $path\r\n";
if(unlink($path)){
echo 'delete success' . "\r\n";
}
$arr['refile'][] = array('path'=>$path,'md5'=>$md5);
}
else{
echo $path . "\r\n";
$arr['file'][] = array('path'=>$path,'md5'=>$md5);
}
}
}
$mydir->close();
}
//判断md5是否存在
function has($arr, $val){
foreach($arr as $a){
if($a['md5'] == $val){
return true;
}
}
return false;
}
运行结果:
...
e:/元器件资料/ABB/低压/双电源CB级.pdf
duplicate e:/元器件资料/ABB/低压/双电源PC级.pdf
delete success
e:/元器件资料/ABB/低压/塑壳断路器.pdf
e:/元器件资料/ABB/低压/开关外型尺寸和电路图2002.pdf
e:/元器件资料/ABB/低压/微型断路器等.pdf
e:/元器件资料/ABB/低压/微断__S800.pdf
e:/元器件资料/ABB/低压/电机控制与保护产品.pdf
e:/元器件资料/ABB/低压/空气断路器Emax X1.pdf
e:/元器件资料/ABB/低压/空气断路器Emax.pdf
scan files: 574
delete duplicate files: 12