Is there a faster way to scan through a directory recursively in .NET?
(9个答案)
已关闭6年。
我正在使用文件同步服务来在不同计算机上的两个文件夹之间同步文件。我需要找到一种枚举目录并从中提取以下信息的快速方法:
此目录内所有文件路径和子目录路径的一个或多个数据结构,包括每个文件或子目录的最后写入时间。
对于在当前目录下任意一级找到的每个子目录,与上面相同。
到目前为止,我已经提出了以下建议:
static void Main(string[] args)
{
List<Tuple<string, DateTime>> files = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> directories = new List<Tuple<string, DateTime>>();
Stopwatch watch = new Stopwatch();
while (true)
{
watch.Start();
while (!CheckFolderRecursiveSingleThreaded("C:\\", out files, out directories))
{
// You can assume for all intents and purposes that drive C does exist and that you have access to it, which will cause this sleep to not get called.
Thread.Sleep(1000);
}
watch.Stop();
Console.WriteLine(watch.ElapsedMilliseconds);
watch.Reset();
// Do something with the information.
Thread.Sleep(1000);
}
}
static bool CheckFolderRecursiveSingleThreaded(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories)
{
try
{
DirectoryInfo directoryInformation = new DirectoryInfo(path);
List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>();
foreach (FileInfo file in directoryInformation.GetFiles())
{
fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc));
}
List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>();
foreach (DirectoryInfo directory in directoryInformation.GetDirectories())
{
// Check for the ReparsePoint flag, which will indicate a symbolic link.
if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc));
List<Tuple<string, DateTime>> directoryFiles;
List<Tuple<string, DateTime>> directoryFolders;
if (CheckFolderRecursiveSingleThreaded(directory.FullName, out directoryFiles, out directoryFolders))
{
fileList.AddRange(directoryFiles);
directoryList.AddRange(directoryFolders);
}
}
}
files = fileList;
directories = directoryList;
return true;
}
catch
{
files = null;
directories = null;
return false;
}
}
在性能方面,通过我的C:\驱动器枚举并产生它可以访问的大约549,254个文件和83,235个文件夹的列表大约需要22秒钟(无论在 Release模式或 Debug模式下运行而未连接调试器),但是
可以会更快吗?我愿意接受任何建议,甚至MSVC++的建议。
编辑:由于多线程,必须使用LINQ的AsParallel进行12秒(必须在 Release模式下测试)。请注意,这将并行化所有C:\子文件夹,但将对我上面的单线程实现进行递归调用,否则将需要很长时间才能一直并行化所有文件夹!
static bool CheckFolderParallelled(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories)
{
try
{
DirectoryInfo directoryInformation = new DirectoryInfo(path);
List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>();
foreach (FileInfo file in directoryInformation.GetFiles())
{
fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc));
}
List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>();
directoryInformation.GetDirectories().AsParallel().ForAll(directory =>
{
// Check for the ReparsePoint flag, which will indicate a symbolic link.
if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc));
List<Tuple<string, DateTime>> directoryFiles;
List<Tuple<string, DateTime>> directoryFolders;
if (CheckFolderRecursiveSingleThreaded(directory.FullName, out directoryFiles, out directoryFolders))
{
fileList.AddRange(directoryFiles);
directoryList.AddRange(directoryFolders);
}
}
});
files = fileList;
directories = directoryList;
return true;
}
catch
{
files = null;
directories = null;
return false;
}
}
编辑:使用Markei Gravell接受的Alexei链接解决方案的答案,仍需要约21秒。这种非递归技术并不是最快的(保持此Queue数据类型保持事件状态的成本与在堆栈上推送和弹出对此方法的调用的成本一样昂贵):
static bool CheckFolderNonRecursive(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories)
{
try
{
List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>();
ConcurrentQueue<DirectoryInfo> pendingSearches = new ConcurrentQueue<DirectoryInfo>();
pendingSearches.Enqueue(new DirectoryInfo(path));
DirectoryInfo pendingDirectory;
while (pendingSearches.Count > 0)
{
if (pendingSearches.TryDequeue(out pendingDirectory))
{
try
{
foreach (FileInfo file in pendingDirectory.GetFiles())
{
fileList.Add(new Tuple<string, DateTime>(file.FullName, file.LastWriteTimeUtc));
}
foreach (DirectoryInfo directory in pendingDirectory.GetDirectories())
{
// Check for the ReparsePoint flag, which will indicate a symbolic link.
if (!directory.Attributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new Tuple<string, DateTime>(directory.FullName, directory.LastWriteTimeUtc));
pendingSearches.Enqueue(directory);
}
}
}
catch { } // Ignore directories with no access rights.
}
}
files = fileList;
directories = directoryList;
return true;
}
catch
{
files = null;
directories = null;
return false;
}
}
编辑:这个问题对.NET是开放式的,因为使用MSVC++库(例如boost)可能会有更快的方法,但是我还没有遇到更快的方法。如果有人可以用更快的C++ C枚举器击败我的C#方法,该枚举器提取相同的数据,首先要感谢您更快地执行此操作,其次我真的很想看到它,其三会有所帮助很多人(不仅是我自己)。直到我意识到以下方法花了大约200,000毫秒,比上面我发布的任何代码都要长得多的时间,我才能获得极大的 boost :
#include "stdafx.h"
#include <iostream>
#include <Windows.h>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/timer.hpp>
namespace fs = boost::filesystem;
bool IterateDirectory(const wchar_t *directory);
int _tmain(int argc, _TCHAR* argv[])
{
boost::timer timer = boost::timer();
while (true)
{
timer.restart();
// L makes it wide, since IterateDirectory takes wchar_t.
// R makes it a raw string literal, which tells the compiler to parse the string as-is, not escape characters and fancy tricks.
IterateDirectory(LR"(C:\)");
std::cout << "Elapsed time: " << timer.elapsed() * 1000 << " ms" << std::endl;
Sleep(1000);
}
return 0;
}
// IterateDirectory takes wchar_t because path.c_str() always returns wchar_t whether you are using unicode or multibyte.
bool IterateDirectory(const wchar_t *directory)
{
if (boost::filesystem::exists(directory))
{
fs::directory_iterator it(directory), eod;
BOOST_FOREACH(fs::path path, std::make_pair(it, eod))
{
try
{
if (is_regular_file(path))
{
//std::cout << path << ", last write time: " << last_write_time(path) << '.' << std::endl;
}
if (is_directory(path))
{
//std::cout << path << ", last write time: " << last_write_time(path) << '.' << std::endl;
// path.c_str() always returns wchar_t, whether you are using unicode or multibyte. This is probably because of multi-language support inside of the Windows operating system and file structure.
IterateDirectory(path.c_str());
}
}
catch (...) { } // Ignore directories we don't have access to.
}
return true;
}
return false;
}
编辑:使用PInvoke来查找FirstFirstFile和FindNextFile大约需要6秒钟来迭代我的整个C驱动器(由于有重复的链接和Sam Saffron的回答)。但是...
可以更快吗吗?
[DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
public static extern IntPtr FindFirstFileW(string lpFileName, out WIN32_FIND_DATAW lpFindFileData);
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
public static extern bool FindNextFile(IntPtr hFindFile, out WIN32_FIND_DATAW lpFindFileData);
[DllImport("kernel32.dll")]
public static extern bool FindClose(IntPtr hFindFile);
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)]
public struct WIN32_FIND_DATAW {
public FileAttributes dwFileAttributes;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftCreationTime;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastAccessTime;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastWriteTime;
public int nFileSizeHigh;
public int nFileSizeLow;
public int dwReserved0;
public int dwReserved1;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)]
public string cFileName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)]
public string cAlternateFileName;
}
static IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1);
static bool FindNextFilePInvokeRecursive(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories)
{
List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>();
WIN32_FIND_DATAW findData;
IntPtr findHandle = INVALID_HANDLE_VALUE;
List<Tuple<string, DateTime>> info = new List<Tuple<string,DateTime>>();
try
{
findHandle = FindFirstFileW(path + @"\*", out findData);
if (findHandle != INVALID_HANDLE_VALUE)
{
do
{
if (findData.cFileName == "." || findData.cFileName == "..") continue;
string fullPath = path + (path.EndsWith("\\") ? String.Empty : "\\") + findData.cFileName;
// Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops.
if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime()));
List<Tuple<string, DateTime>> subDirectoryFileList = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> subDirectoryDirectoryList = new List<Tuple<string, DateTime>>();
if (FindNextFilePInvokeRecursive(fullPath, out subDirectoryFileList, out subDirectoryDirectoryList))
{
fileList.AddRange(subDirectoryFileList);
directoryList.AddRange(subDirectoryDirectoryList);
}
}
else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory))
{
fileList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime()));
}
}
while (FindNextFile(findHandle, out findData));
}
}
catch (Exception exception)
{
Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString());
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = null;
directories = null;
return false;
}
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = fileList;
directories = directoryList;
return true;
}
public static class FILETIMEExtensions
{
public static DateTime ToDateTime(this System.Runtime.InteropServices.ComTypes.FILETIME filetime)
{
long highBits = filetime.dwHighDateTime;
highBits = highBits << 32;
return DateTime.FromFileTimeUtc(highBits + (long)filetime.dwLowDateTime);
}
}
编辑:是的,它可以更快。使用并行化目标文件夹的子目录递归的技术,我可以使用上面的FindNextFilePInvokeRecursive方法将其恢复到4秒钟。这就是
4秒钟,以便用所需的数据迭代整个C驱动器。我可以在进程监视器中看到,我最多只吃掉30%的CPU和最多仅吃掉1%的磁盘,这对我来说有点奇怪,不知道为什么会这样,也许只是这种链表遍历样式导致它成为几乎可以忽略不计。理想情况下,它至少应消耗100%的CPU,但这可能取决于要并行处理的子文件夹的数量和深度。
但是可以更快吗?
static bool FindNextFilePInvokeRecursiveParalleled(string path, out List<Tuple<string, DateTime>> files, out List<Tuple<string, DateTime>> directories)
{
List<Tuple<string, DateTime>> fileList = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> directoryList = new List<Tuple<string, DateTime>>();
WIN32_FIND_DATAW findData;
IntPtr findHandle = INVALID_HANDLE_VALUE;
List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>();
try
{
findHandle = FindFirstFileW(path + @"\*", out findData);
if (findHandle != INVALID_HANDLE_VALUE)
{
do
{
if (findData.cFileName == "." || findData.cFileName == "..") continue;
string fullPath = path + (path.EndsWith("\\") ? String.Empty : "\\") + findData.cFileName;
// Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops.
if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime()));
}
else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory))
{
fileList.Add(new Tuple<string, DateTime>(fullPath, findData.ftLastWriteTime.ToDateTime()));
}
}
while (FindNextFile(findHandle, out findData));
directoryList.AsParallel().ForAll(x =>
{
List<Tuple<string, DateTime>> subDirectoryFileList = new List<Tuple<string, DateTime>>();
List<Tuple<string, DateTime>> subDirectoryDirectoryList = new List<Tuple<string, DateTime>>();
if (FindNextFilePInvokeRecursive(x.Item1, out subDirectoryFileList, out subDirectoryDirectoryList))
{
fileList.AddRange(subDirectoryFileList);
directoryList.AddRange(subDirectoryDirectoryList);
}
});
}
}
catch (Exception exception)
{
Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString());
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = null;
directories = null;
return false;
}
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = fileList;
directories = directoryList;
return true;
}
编辑:使用并行时忘记添加并发锁,否则您可能会捕获异常。为了我的目的,还删除了元组,并使用了FileInformation/DirectoryInformation类。这缩短了0.5秒。现在
3.5秒来枚举我的C:驱动器。
[DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
public static extern IntPtr FindFirstFileW(string lpFileName, out WIN32_FIND_DATAW lpFindFileData);
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
public static extern bool FindNextFile(IntPtr hFindFile, out WIN32_FIND_DATAW lpFindFileData);
[DllImport("kernel32.dll")]
public static extern bool FindClose(IntPtr hFindFile);
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)]
public struct WIN32_FIND_DATAW {
public FileAttributes dwFileAttributes;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftCreationTime;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastAccessTime;
internal System.Runtime.InteropServices.ComTypes.FILETIME ftLastWriteTime;
public int nFileSizeHigh;
public int nFileSizeLow;
public int dwReserved0;
public int dwReserved1;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)]
public string cFileName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)]
public string cAlternateFileName;
}
static IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1);
static bool FindNextFilePInvokeRecursive(string path, out List<FileInformation> files, out List<DirectoryInformation> directories)
{
List<FileInformation> fileList = new List<FileInformation>();
List<DirectoryInformation> directoryList = new List<DirectoryInformation>();
WIN32_FIND_DATAW findData;
IntPtr findHandle = INVALID_HANDLE_VALUE;
List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>();
try
{
findHandle = FindFirstFileW(path + @"\*", out findData);
if (findHandle != INVALID_HANDLE_VALUE)
{
do
{
// Skip current directory and parent directory symbols that are returned.
if (findData.cFileName != "." && findData.cFileName != "..")
{
string fullPath = path + @"\" + findData.cFileName;
// Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops.
if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new DirectoryInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() });
List<FileInformation> subDirectoryFileList = new List<FileInformation>();
List<DirectoryInformation> subDirectoryDirectoryList = new List<DirectoryInformation>();
if (FindNextFilePInvokeRecursive(fullPath, out subDirectoryFileList, out subDirectoryDirectoryList))
{
fileList.AddRange(subDirectoryFileList);
directoryList.AddRange(subDirectoryDirectoryList);
}
}
else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory))
{
fileList.Add(new FileInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() });
}
}
}
while (FindNextFile(findHandle, out findData));
}
}
catch (Exception exception)
{
Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString());
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = null;
directories = null;
return false;
}
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = fileList;
directories = directoryList;
return true;
}
static bool FindNextFilePInvokeRecursiveParalleled(string path, out List<FileInformation> files, out List<DirectoryInformation> directories)
{
List<FileInformation> fileList = new List<FileInformation>();
object fileListLock = new object();
List<DirectoryInformation> directoryList = new List<DirectoryInformation>();
object directoryListLock = new object();
WIN32_FIND_DATAW findData;
IntPtr findHandle = INVALID_HANDLE_VALUE;
List<Tuple<string, DateTime>> info = new List<Tuple<string, DateTime>>();
try
{
path = path.EndsWith(@"\") ? path : path + @"\";
findHandle = FindFirstFileW(path + @"*", out findData);
if (findHandle != INVALID_HANDLE_VALUE)
{
do
{
// Skip current directory and parent directory symbols that are returned.
if (findData.cFileName != "." && findData.cFileName != "..")
{
string fullPath = path + findData.cFileName;
// Check if this is a directory and not a symbolic link since symbolic links could lead to repeated files and folders as well as infinite loops.
if (findData.dwFileAttributes.HasFlag(FileAttributes.Directory) && !findData.dwFileAttributes.HasFlag(FileAttributes.ReparsePoint))
{
directoryList.Add(new DirectoryInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() });
}
else if (!findData.dwFileAttributes.HasFlag(FileAttributes.Directory))
{
fileList.Add(new FileInformation { FullPath = fullPath, LastWriteTime = findData.ftLastWriteTime.ToDateTime() });
}
}
}
while (FindNextFile(findHandle, out findData));
directoryList.AsParallel().ForAll(x =>
{
List<FileInformation> subDirectoryFileList = new List<FileInformation>();
List<DirectoryInformation> subDirectoryDirectoryList = new List<DirectoryInformation>();
if (FindNextFilePInvokeRecursive(x.FullPath, out subDirectoryFileList, out subDirectoryDirectoryList))
{
lock (fileListLock)
{
fileList.AddRange(subDirectoryFileList);
}
lock (directoryListLock)
{
directoryList.AddRange(subDirectoryDirectoryList);
}
}
});
}
}
catch (Exception exception)
{
Console.WriteLine("Caught exception while trying to enumerate a directory. {0}", exception.ToString());
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = null;
directories = null;
return false;
}
if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
files = fileList;
directories = directoryList;
return true;
}
public class FileInformation
{
public string FullPath;
public DateTime LastWriteTime;
}
public class DirectoryInformation
{
public string FullPath;
public DateTime LastWriteTime;
}
编辑:B.K.询问有关从FILETIME转换为DateTime的问题:
public static class FILETIMEExtensions
{
public static DateTime ToDateTime(this System.Runtime.InteropServices.ComTypes.FILETIME time)
{
ulong high = (ulong)time.dwHighDateTime;
ulong low = (ulong)time.dwLowDateTime;
long fileTime = (long)((high << 32) + low);
return DateTime.FromFileTimeUtc(fileTime);
}
}
我是一名优秀的程序员,十分优秀!