llvm.org GIT mirror llvm / daa4acc
Support: Have directory_iterator::status() return FindFirstFileEx/FindNextFile results on Windows. This allows clients to avoid an unnecessary fs::status() call on each directory entry. Because the information returned by FindFirstFileEx is a subset of the information returned by a regular status() call, I needed to extract a base class from file_status that contains only that information. On my machine, this reduces the time required to enumerate a ThinLTO cache directory containing 520k files from almost 4 minutes to less than 2 seconds. Differential Revision: https://reviews.llvm.org/D38716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315378 91177308-0d34-0410-b5e6-96231b3b80d8 Peter Collingbourne 1 year, 11 months ago
6 changed file(s) with 157 addition(s) and 116 deletion(s). Raw diff Collapse all Expand all
140140 uint64_t getFile() const { return File; }
141141 };
142142
143 /// file_status - Represents the result of a call to stat and friends. It has
144 /// a platform-specific member to store the result.
145 class file_status
146 {
147 friend bool equivalent(file_status A, file_status B);
148
143 /// Represents the result of a call to directory_iterator::status(). This is a
144 /// subset of the information returned by a regular sys::fs::status() call, and
145 /// represents the information provided by Windows FileFirstFile/FindNextFile.
146 class basic_file_status {
147 protected:
149148 #if defined(LLVM_ON_UNIX)
150 dev_t fs_st_dev = 0;
151 nlink_t fs_st_nlinks = 0;
152 ino_t fs_st_ino = 0;
153149 time_t fs_st_atime = 0;
154150 time_t fs_st_mtime = 0;
155151 uid_t fs_st_uid = 0;
156152 gid_t fs_st_gid = 0;
157153 off_t fs_st_size = 0;
158154 #elif defined (LLVM_ON_WIN32)
159 uint32_t NumLinks = 0;
160155 uint32_t LastAccessedTimeHigh = 0;
161156 uint32_t LastAccessedTimeLow = 0;
162157 uint32_t LastWriteTimeHigh = 0;
163158 uint32_t LastWriteTimeLow = 0;
164 uint32_t VolumeSerialNumber = 0;
165159 uint32_t FileSizeHigh = 0;
166160 uint32_t FileSizeLow = 0;
167 uint32_t FileIndexHigh = 0;
168 uint32_t FileIndexLow = 0;
169161 #endif
170162 file_type Type = file_type::status_error;
171163 perms Perms = perms_not_known;
172164
173165 public:
166 basic_file_status() = default;
167
168 explicit basic_file_status(file_type Type) : Type(Type) {}
169
174170 #if defined(LLVM_ON_UNIX)
175 file_status() = default;
176
177 file_status(file_type Type) : Type(Type) {}
178
179 file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino,
180 time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size)
181 : fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino), fs_st_atime(ATime),
182 fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID), fs_st_size(Size),
183 Type(Type), Perms(Perms) {}
184 #elif defined(LLVM_ON_WIN32)
185 file_status() = default;
186
187 file_status(file_type Type) : Type(Type) {}
188
189 file_status(file_type Type, perms Perms, uint32_t LinkCount,
190 uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow,
191 uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow,
192 uint32_t VolumeSerialNumber, uint32_t FileSizeHigh,
193 uint32_t FileSizeLow, uint32_t FileIndexHigh,
194 uint32_t FileIndexLow)
195 : NumLinks(LinkCount), LastAccessedTimeHigh(LastAccessTimeHigh),
171 basic_file_status(file_type Type, perms Perms, time_t ATime, time_t MTime,
172 uid_t UID, gid_t GID, off_t Size)
173 : fs_st_atime(ATime), fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID),
174 fs_st_size(Size), Type(Type), Perms(Perms) {}
175 #elif defined(LLVM_ON_WIN32)
176 basic_file_status(file_type Type, perms Perms, uint32_t LastAccessTimeHigh,
177 uint32_t LastAccessTimeLow, uint32_t LastWriteTimeHigh,
178 uint32_t LastWriteTimeLow, uint32_t FileSizeHigh,
179 uint32_t FileSizeLow)
180 : LastAccessedTimeHigh(LastAccessTimeHigh),
196181 LastAccessedTimeLow(LastAccessTimeLow),
197182 LastWriteTimeHigh(LastWriteTimeHigh),
198 LastWriteTimeLow(LastWriteTimeLow),
199 VolumeSerialNumber(VolumeSerialNumber), FileSizeHigh(FileSizeHigh),
200 FileSizeLow(FileSizeLow), FileIndexHigh(FileIndexHigh),
201 FileIndexLow(FileIndexLow), Type(Type), Perms(Perms) {}
183 LastWriteTimeLow(LastWriteTimeLow), FileSizeHigh(FileSizeHigh),
184 FileSizeLow(FileSizeLow), Type(Type), Perms(Perms) {}
202185 #endif
203186
204187 // getters
206189 perms permissions() const { return Perms; }
207190 TimePoint<> getLastAccessedTime() const;
208191 TimePoint<> getLastModificationTime() const;
209 UniqueID getUniqueID() const;
210 uint32_t getLinkCount() const;
211192
212193 #if defined(LLVM_ON_UNIX)
213194 uint32_t getUser() const { return fs_st_uid; }
232213 void permissions(perms p) { Perms = p; }
233214 };
234215
216 /// Represents the result of a call to sys::fs::status().
217 class file_status : public basic_file_status {
218 friend bool equivalent(file_status A, file_status B);
219
220 #if defined(LLVM_ON_UNIX)
221 dev_t fs_st_dev = 0;
222 nlink_t fs_st_nlinks = 0;
223 ino_t fs_st_ino = 0;
224 #elif defined (LLVM_ON_WIN32)
225 uint32_t NumLinks = 0;
226 uint32_t VolumeSerialNumber = 0;
227 uint32_t FileIndexHigh = 0;
228 uint32_t FileIndexLow = 0;
229 #endif
230
231 public:
232 file_status() = default;
233
234 explicit file_status(file_type Type) : basic_file_status(Type) {}
235
236 #if defined(LLVM_ON_UNIX)
237 file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino,
238 time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size)
239 : basic_file_status(Type, Perms, ATime, MTime, UID, GID, Size),
240 fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino) {}
241 #elif defined(LLVM_ON_WIN32)
242 file_status(file_type Type, perms Perms, uint32_t LinkCount,
243 uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow,
244 uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow,
245 uint32_t VolumeSerialNumber, uint32_t FileSizeHigh,
246 uint32_t FileSizeLow, uint32_t FileIndexHigh,
247 uint32_t FileIndexLow)
248 : basic_file_status(Type, Perms, LastAccessTimeHigh, LastAccessTimeLow,
249 LastWriteTimeHigh, LastWriteTimeLow, FileSizeHigh,
250 FileSizeLow),
251 NumLinks(LinkCount), VolumeSerialNumber(VolumeSerialNumber),
252 FileIndexHigh(FileIndexHigh), FileIndexLow(FileIndexLow) {}
253 #endif
254
255 UniqueID getUniqueID() const;
256 uint32_t getLinkCount() const;
257 };
258
235259 /// @}
236260 /// @name Physical Operators
237261 /// @{
382406
383407 /// @brief Does file exist?
384408 ///
385 /// @param status A file_status previously returned from stat.
409 /// @param status A basic_file_status previously returned from stat.
386410 /// @returns True if the file represented by status exists, false if it does
387411 /// not.
388 bool exists(file_status status);
412 bool exists(const basic_file_status &status);
389413
390414 enum class AccessMode { Exist, Write, Execute };
391415
484508
485509 /// @brief Does status represent a directory?
486510 ///
487 /// @param status A file_status previously returned from status.
511 /// @param status A basic_file_status previously returned from status.
488512 /// @returns status.type() == file_type::directory_file.
489 bool is_directory(file_status status);
513 bool is_directory(const basic_file_status &status);
490514
491515 /// @brief Is path a directory?
492516 ///
506530
507531 /// @brief Does status represent a regular file?
508532 ///
509 /// @param status A file_status previously returned from status.
533 /// @param status A basic_file_status previously returned from status.
510534 /// @returns status_known(status) && status.type() == file_type::regular_file.
511 bool is_regular_file(file_status status);
535 bool is_regular_file(const basic_file_status &status);
512536
513537 /// @brief Is path a regular file?
514538 ///
530554
531555 /// @brief Does status represent a symlink file?
532556 ///
533 /// @param status A file_status previously returned from status.
557 /// @param status A basic_file_status previously returned from status.
534558 /// @returns status_known(status) && status.type() == file_type::symlink_file.
535 bool is_symlink_file(file_status status);
559 bool is_symlink_file(const basic_file_status &status);
536560
537561 /// @brief Is path a symlink file?
538562 ///
555579 /// @brief Does this status represent something that exists but is not a
556580 /// directory or regular file?
557581 ///
558 /// @param status A file_status previously returned from status.
582 /// @param status A basic_file_status previously returned from status.
559583 /// @returns exists(s) && !is_regular_file(s) && !is_directory(s)
560 bool is_other(file_status status);
584 bool is_other(const basic_file_status &status);
561585
562586 /// @brief Is path something that exists but is not a directory,
563587 /// regular file, or symlink?
630654 ///
631655 /// @param s Input file status.
632656 /// @returns True if status() != status_error.
633 bool status_known(file_status s);
657 bool status_known(const basic_file_status &s);
634658
635659 /// @brief Is status available?
636660 ///
792816 class directory_entry {
793817 std::string Path;
794818 bool FollowSymlinks;
795 mutable file_status Status;
819 basic_file_status Status;
796820
797821 public:
798822 explicit directory_entry(const Twine &path, bool follow_symlinks = true,
799 file_status st = file_status())
823 basic_file_status st = basic_file_status())
800824 : Path(path.str()), FollowSymlinks(follow_symlinks), Status(st) {}
801825
802826 directory_entry() = default;
803827
804 void assign(const Twine &path, file_status st = file_status()) {
828 void assign(const Twine &path, basic_file_status st = basic_file_status()) {
805829 Path = path.str();
806830 Status = st;
807831 }
808832
809 void replace_filename(const Twine &filename, file_status st = file_status());
833 void replace_filename(const Twine &filename,
834 basic_file_status st = basic_file_status());
810835
811836 const std::string &path() const { return Path; }
812 std::error_code status(file_status &result) const;
837 ErrorOr status() const;
813838
814839 bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; }
815840 bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); }
928953 if (State->HasNoPushRequest)
929954 State->HasNoPushRequest = false;
930955 else {
931 file_status st;
932 if ((ec = State->Stack.top()->status(st))) return *this;
933 if (is_directory(st)) {
956 ErrorOr st = State->Stack.top()->status();
957 if (!st) return *this;
958 if (is_directory(*st)) {
934959 State->Stack.push(directory_iterator(*State->Stack.top(), ec, Follow));
935960 if (ec) return *this;
936961 if (State->Stack.top() != end_itr) {
181181 bool ShouldComputeSize =
182182 (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0);
183183
184 // Keep track of space
184 // Keep track of space. Needs to be kept ordered by size for determinism.
185185 std::set> FileSizes;
186186 uint64_t TotalSize = 0;
187 // Helper to add a path to the set of files to consider for size-based
188 // pruning, sorted by size.
189 auto AddToFileListForSizePruning =
190 [&](StringRef Path) {
191 if (!ShouldComputeSize)
192 return;
193 TotalSize += FileStatus.getSize();
194 FileSizes.insert(
195 std::make_pair(FileStatus.getSize(), std::string(Path)));
196 };
197187
198188 // Walk the entire directory cache, looking for unused files.
199189 std::error_code EC;
211201
212202 // Look at this file. If we can't stat it, there's nothing interesting
213203 // there.
214 if (sys::fs::status(File->path(), FileStatus)) {
204 ErrorOr StatusOrErr = File->status();
205 if (!StatusOrErr) {
215206 DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
216207 continue;
217208 }
218209
219210 // If the file hasn't been used recently enough, delete it
220 const auto FileAccessTime = FileStatus.getLastAccessedTime();
211 const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
221212 auto FileAge = CurrentTime - FileAccessTime;
222213 if (FileAge > Policy.Expiration) {
223214 DEBUG(dbgs() << "Remove " << File->path() << " ("
227218 }
228219
229220 // Leave it here for now, but add it to the list of size-based pruning.
230 AddToFileListForSizePruning(File->path());
221 if (!ShouldComputeSize)
222 continue;
223 TotalSize += StatusOrErr->getSize();
224 FileSizes.insert({StatusOrErr->getSize(), std::string(File->path())});
231225 }
232226
233227 // Prune for size now if needed
951951 return Result;
952952 }
953953
954 bool exists(file_status status) {
954 bool exists(const basic_file_status &status) {
955955 return status_known(status) && status.type() != file_type::file_not_found;
956956 }
957957
958 bool status_known(file_status s) {
958 bool status_known(const basic_file_status &s) {
959959 return s.type() != file_type::status_error;
960960 }
961961
966966 return st.type();
967967 }
968968
969 bool is_directory(file_status status) {
969 bool is_directory(const basic_file_status &status) {
970970 return status.type() == file_type::directory_file;
971971 }
972972
978978 return std::error_code();
979979 }
980980
981 bool is_regular_file(file_status status) {
981 bool is_regular_file(const basic_file_status &status) {
982982 return status.type() == file_type::regular_file;
983983 }
984984
990990 return std::error_code();
991991 }
992992
993 bool is_symlink_file(file_status status) {
993 bool is_symlink_file(const basic_file_status &status) {
994994 return status.type() == file_type::symlink_file;
995995 }
996996
10021002 return std::error_code();
10031003 }
10041004
1005 bool is_other(file_status status) {
1005 bool is_other(const basic_file_status &status) {
10061006 return exists(status) &&
10071007 !is_regular_file(status) &&
10081008 !is_directory(status);
10161016 return std::error_code();
10171017 }
10181018
1019 void directory_entry::replace_filename(const Twine &filename, file_status st) {
1019 void directory_entry::replace_filename(const Twine &filename,
1020 basic_file_status st) {
10201021 SmallString<128> path = path::parent_path(Path);
10211022 path::append(path, filename);
10221023 Path = path.str();
10231024 Status = st;
1024 }
1025
1026 std::error_code directory_entry::status(file_status &result) const {
1027 return fs::status(Path, result, FollowSymlinks);
10281025 }
10291026
10301027 ErrorOr getPermissions(const Twine &Path) {
216216 return "";
217217 }
218218
219 TimePoint<> file_status::getLastAccessedTime() const {
219 TimePoint<> basic_file_status::getLastAccessedTime() const {
220220 return toTimePoint(fs_st_atime);
221221 }
222222
223 TimePoint<> file_status::getLastModificationTime() const {
223 TimePoint<> basic_file_status::getLastModificationTime() const {
224224 return toTimePoint(fs_st_mtime);
225225 }
226226
710710 return directory_iterator_destruct(it);
711711
712712 return std::error_code();
713 }
714
715 ErrorOr directory_entry::status() const {
716 file_status s;
717 if (auto EC = fs::status(Path, s, FollowSymlinks))
718 return EC;
719 return s;
713720 }
714721
715722 #if !defined(F_GETPATH)
808815 directory_iterator End;
809816 while (Begin != End) {
810817 auto &Item = *Begin;
811 file_status st;
812 EC = Item.status(st);
813 if (EC && !IgnoreErrors)
814 return EC;
815
816 if (is_directory(st)) {
818 ErrorOr st = Item.status();
819 if (!st && !IgnoreErrors)
820 return st.getError();
821
822 if (is_directory(*st)) {
817823 EC = remove_directories_impl(Item, IgnoreErrors);
818824 if (EC && !IgnoreErrors)
819825 return EC;
167167 return SpaceInfo;
168168 }
169169
170 TimePoint<> file_status::getLastAccessedTime() const {
170 TimePoint<> basic_file_status::getLastAccessedTime() const {
171171 FILETIME Time;
172172 Time.dwLowDateTime = LastAccessedTimeLow;
173173 Time.dwHighDateTime = LastAccessedTimeHigh;
174174 return toTimePoint(Time);
175175 }
176176
177 TimePoint<> file_status::getLastModificationTime() const {
177 TimePoint<> basic_file_status::getLastModificationTime() const {
178178 FILETIME Time;
179179 Time.dwLowDateTime = LastWriteTimeLow;
180180 Time.dwHighDateTime = LastWriteTimeHigh;
568568 return false;
569569 }
570570
571 static file_type file_type_from_attrs(DWORD Attrs) {
572 return (Attrs & FILE_ATTRIBUTE_DIRECTORY) ? file_type::directory_file
573 : file_type::regular_file;
574 }
575
576 static perms perms_from_attrs(DWORD Attrs) {
577 return (Attrs & FILE_ATTRIBUTE_READONLY) ? (all_read | all_exe) : all_all;
578 }
579
571580 static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
572581 if (FileHandle == INVALID_HANDLE_VALUE)
573582 goto handle_status_error;
596605 if (!::GetFileInformationByHandle(FileHandle, &Info))
597606 goto handle_status_error;
598607
599 {
600 file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
601 ? file_type::directory_file
602 : file_type::regular_file;
603 perms Permissions = (Info.dwFileAttributes & FILE_ATTRIBUTE_READONLY)
604 ? (all_read | all_exe)
605 : all_all;
606 Result = file_status(
607 Type, Permissions, Info.nNumberOfLinks,
608 Info.ftLastAccessTime.dwHighDateTime,
609 Info.ftLastAccessTime.dwLowDateTime,
610 Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime,
611 Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow,
612 Info.nFileIndexHigh, Info.nFileIndexLow);
613 return std::error_code();
614 }
608 Result = file_status(
609 file_type_from_attrs(Info.dwFileAttributes),
610 perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks,
611 Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime,
612 Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime,
613 Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow,
614 Info.nFileIndexHigh, Info.nFileIndexLow);
615 return std::error_code();
615616
616617 handle_status_error:
617618 DWORD LastError = ::GetLastError();
797798 return SysInfo.dwAllocationGranularity;
798799 }
799800
801 static basic_file_status status_from_find_data(WIN32_FIND_DATA *FindData) {
802 return basic_file_status(file_type_from_attrs(FindData->dwFileAttributes),
803 perms_from_attrs(FindData->dwFileAttributes),
804 FindData->ftLastAccessTime.dwHighDateTime,
805 FindData->ftLastAccessTime.dwLowDateTime,
806 FindData->ftLastWriteTime.dwHighDateTime,
807 FindData->ftLastWriteTime.dwLowDateTime,
808 FindData->nFileSizeHigh, FindData->nFileSizeLow);
809 }
810
800811 std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
801812 StringRef path,
802813 bool follow_symlinks) {
817828
818829 // Get the first directory entry.
819830 WIN32_FIND_DATAW FirstFind;
820 ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
831 ScopedFindHandle FindHandle(::FindFirstFileExW(
832 c_str(path_utf16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch,
833 NULL, FIND_FIRST_EX_LARGE_FETCH));
821834 if (!FindHandle)
822835 return mapWindowsError(::GetLastError());
823836
844857 it.IterationHandle = intptr_t(FindHandle.take());
845858 SmallString<128> directory_entry_path(path);
846859 path::append(directory_entry_path, directory_entry_name_utf8);
847 it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks);
860 it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks,
861 status_from_find_data(&FirstFind));
848862
849863 return std::error_code();
850864 }
880894 directory_entry_path_utf8))
881895 return ec;
882896
883 it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
884 return std::error_code();
897 it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8),
898 status_from_find_data(&FindData));
899 return std::error_code();
900 }
901
902 ErrorOr directory_entry::status() const {
903 return Status;
885904 }
886905
887906 static std::error_code realPathFromHandle(HANDLE H,
868868 i != e; i.increment(ec)) {
869869 ASSERT_NO_ERROR(ec);
870870
871 fs::file_status status;
872 if (i->status(status) ==
871 ErrorOr status = i->status();
872 if (status.getError() ==
873873 std::make_error_code(std::errc::no_such_file_or_directory)) {
874874 i.no_push();
875875 continue;