C# Alternative to Directory.GetFiles()
A while ago I was working with directories with thousands of files, and wanted to get only the first few files in the directory.
Directory.GetFiles() always returns the entire directory, and so is rather slow on huge directories.
I found and used this excellent post with a managed C++ class that does exactly what I wanted. A caveat I had with the code is that it didn’t work on 64bit machines when run in 64bit mode (instead of in WOW64 compatibility mode). Today I finally got the time to translate it to C#, so we can compile it as part of our solution instead of referencing a pre-built C++ dll. I’m also included a small wrapper DirectoryUtils, that provides an easier way to get a list of N files in a dir (simple API wrapper), plus an additional method to get a random selection of N files from that directory.
using System; using System.Collections; using System.Collections.Generic; using System.ComponentModel; using System.IO; using System.Runtime.InteropServices; using Microsoft.Win32.SafeHandles; using NUnit.Framework; using FILETIME=System.Runtime.InteropServices.ComTypes.FILETIME; using IEnumerator=System.Collections.IEnumerator; namespace Semingo.IntegrationTests.DirectoryUtils { public static class DirectoryUtils { /// <summary> /// Efficiently get only some files from a dir /// </summary> /// <param name="directory"></param> /// <param name="limit"></param> /// <returns></returns> public static List<string> GetFiles(string directory, int limit) { Assert.IsTrue(limit > 0, "Limit should be positive"); List<string> result = new List<string>(limit); FilesFinder filesFinder = new FilesFinder(directory + "\\*.*"); foreach (FoundFileData foundFile in filesFinder) { string fullFilename = directory + "\\" + foundFile.FileName; if (!IsFile(fullFilename)) continue; limit--; result.Add(fullFilename); if (limit == 0) return result; } return result; } /// <summary> /// Return a random set of count files from a directory. (note, this can be slow on huge directories!) /// </summary> /// <remarks> /// If the directory has less files, all of it is returned /// </remarks> /// <param name="directory"></param> /// <param name="limit"></param> /// <returns></returns> public static List<string> GetRandomizedFiles(string directory, int limit { string[] files = Directory.GetFiles(directory); if (files.Length <= limit) return new List<string>(files); // we don't have enough, so we have to randomize int[] permutation = GenerateRandomPermutation(files.Length); // take first count files List<string> result = new List<string>(count); for (int i = 0; i < count; ++i) { result.Add(files[permutation[i]]); } return result; } private static bool IsFile(string name) { return File.Exists(name); } private static int[] GenerateRandomPermutation(int count) { int[] array = new int[count]; for (int i = 0; i < count; ++i) array[i] = i; // http://forums.msdn.microsoft.com/en-US/csharpgeneral/thread/8b489948-f1b5-46d0-8bc5-bd94c418e41d/ Random random = new Random(); // i.e., Random Class. int n = array.Length; // The number of items left to shuffle (loop invariant). while (n > 1) { int k = random.Next(n); // 0 <= k < n. n--; // n is now the last pertinent index; int temp = array[n]; // swap array[n] with array[k] (does nothing if k == n). array[n] = array[k]; array[k] = temp; } return array; } } /// <summary> /// Taken from http://blogs.msdn.com/yvesdolc/archive/2005/08/06/448673.aspx and translated to C# /// </summary> public class FilesEnumerator : IEnumerator<FoundFileData> { #region Interop imports private const int ERROR_FILE_NOT_FOUND = 2; private const int ERROR_NO_MORE_FILES = 18; [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)] private static extern IntPtr FindFirstFile(string lpFileName, out WIN32_FIND_DATA lpFindFileData); [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)] private static extern bool FindNextFile(SafeHandle hFindFile, out WIN32_FIND_DATA lpFindFileData); #endregion #region Data Members private readonly string _fileName; private SafeHandle _findHandle; private WIN32_FIND_DATA _win32FindData; #endregion public FilesEnumerator(string fileName) { _fileName = fileName; _findHandle = null; _win32FindData = new WIN32_FIND_DATA(); } #region IEnumerator<FoundFileData> Members public FoundFileData Current { get { if (_findHandle == null) throw new InvalidOperationException("MoveNext() must be called first"); return new FoundFileData(ref _win32FindData); } } object IEnumerator.Current { get { return Current; } } public bool MoveNext() { if (_findHandle == null) { _findHandle = new SafeFileHandle(FindFirstFile(_fileName, out _win32FindData), true); if (_findHandle.IsInvalid) { int lastError = Marshal.GetLastWin32Error(); if (lastError == ERROR_FILE_NOT_FOUND) return false; throw new Win32Exception(lastError); } } else { if (!FindNextFile(_findHandle, out _win32FindData)) { int lastError = Marshal.GetLastWin32Error(); if (lastError == ERROR_NO_MORE_FILES) return false; throw new Win32Exception(lastError); } } return true; } public void Reset() { if (_findHandle.IsInvalid) return; _findHandle.Close(); _findHandle.SetHandleAsInvalid(); } public void Dispose() { _findHandle.Dispose(); } #endregion } public class FilesFinder : IEnumerable<FoundFileData> { readonly string _fileName; public FilesFinder(string fileName) { _fileName = fileName; } public IEnumerator<FoundFileData> GetEnumerator() { return new FilesEnumerator(_fileName); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } } public class FoundFileData { public string AlternateFileName; public FileAttributes Attributes; public DateTime CreationTime; public string FileName; public DateTime LastAccessTime; public DateTime LastWriteTime; public UInt64 Size; internal FoundFileData(ref WIN32_FIND_DATA win32FindData) { Attributes = (FileAttributes)win32FindData.dwFileAttributes; CreationTime = DateTime.FromFileTime((long) (((UInt64)win32FindData.ftCreationTime.dwHighDateTime << 32) + (UInt64)win32FindData.ftCreationTime.dwLowDateTime)); LastAccessTime = DateTime.FromFileTime((long) (((UInt64)win32FindData.ftLastAccessTime.dwHighDateTime << 32) + (UInt64)win32FindData.ftLastAccessTime.dwLowDateTime)); LastWriteTime = DateTime.FromFileTime((long) (((UInt64)win32FindData.ftLastWriteTime.dwHighDateTime << 32) + (UInt64)win32FindData.ftLastWriteTime.dwLowDateTime)); Size = ((UInt64)win32FindData.nFileSizeHigh << 32) + win32FindData.nFileSizeLow; FileName = win32FindData.cFileName; AlternateFileName = win32FindData.cAlternateFileName; } } /// <summary> /// Safely wraps handles that need to be closed via FindClose() WIN32 method (obtained by FindFirstFile()) /// </summary> public class SafeFindFileHandle : SafeHandleZeroOrMinusOneIsInvalid { [DllImport("kernel32.dll", SetLastError = true)] private static extern bool FindClose(SafeHandle hFindFile); public SafeFindFileHandle(bool ownsHandle) : base(ownsHandle) { } protected override bool ReleaseHandle() { return FindClose(this); } } // The CharSet must match the CharSet of the corresponding PInvoke signature [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)] public struct WIN32_FIND_DATA { public uint dwFileAttributes; public FILETIME ftCreationTime; public FILETIME ftLastAccessTime; public FILETIME ftLastWriteTime; public uint nFileSizeHigh; public uint nFileSizeLow; public uint dwReserved0; public uint dwReserved1; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)] public string cFileName; [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)] public string cAlternateFileName; } } |