ALib C++ Framework
by
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
fscanner.inl
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_files of the \aliblong.
4///
5/// \emoji :copyright: 2013-2025 A-Worx GmbH, Germany.
6/// Published under #"mainpage_license".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace files {
9
10#if ALIB_DEBUG
11/// The format string used with verbose logging to domain <c>/ALIB/FILES/SCAN</c> by the namespace
12/// function #"ScanFiles(FTree&)".<br>
13/// Defaults to <c>" {:ta h{2,r} on{10,r} gn{10,r} s(IEC){10,r} dm qqq nf l}"</c>
15#endif
16
17
18/// Input parameters to function #"ScanFiles(FTree&)".
20{
21 /// Options for processing symbolic links.
22 enum class SymbolicLinks
23 {
24 DONT_RESOLVE = 0, ///< Demands \b not to resolve symbolic links in any way.
25 RESOLVE_BUT_DONT_FOLLOW = 1, ///< Demands to read symbolic links, but not follow linked directories.
26 ///< FInfo dates, sizes, and access rights are set according to
27 ///< the link target.
28 RECURSIVE = 2, ///< Read symbolic links and in case they are targeting a
29 ///< directory, recurse into, if this directory meets the
30 ///< other constraints associated with the current scan.
31 };
32
33 /// Denotes 'infinite' recursion if set to field #"MaxDepth".
34 static constexpr unsigned InfiniteRecursion = (std::numeric_limits<unsigned>::max)();
35
36 /// The path to be scanned.
38
39 /// Denotes how symbolic links are treated.
41
42 /// The maximum recursion depth. Defaults to #"InfiniteRecursion".
44
45 /// If \c true, the default, scanning does not stop recursion on directories which represent
46 /// a mounted filesystem. If \c false, the search is restricted to the device that #"StartPath"
47 /// resides in.
48 bool CrossFileSystems = true;
49
50 /// If \c false (the default), scanning aborts if \e 'artificial' filesystems are found.
51 /// Artificial filesystems under GNU/Linux, are for example:
52 /// <c>/proc</c>, <c>/dev</c>, <c>/run</c>, <c>/sys</c>, and <c>/temp</c>.
53 bool IncludeArtificialFS = false;
54
55 /// If \c false, empty directories remain in the result tree. Otherwise, they are deleted
56 /// and do not appear in the tree.
58
59 /// If set (not containing \c nullptr), files are passed to this filter and removed if \c false
60 /// is returned.<br>
61 /// The term "files" here means all sorts of files except Directories.
62 /// Directories are either real directories, or in case the field #"LinkTreatment" is set to
63 /// #"SymbolicLinks::RECURSIVE", symbolic links that
64 /// target a directory.
65 ///
66 /// \see Optional filters #"DirectoryFilterPreRecursion" and #"DirectoryFilterPostRecursion".
68
69 /// If set (not containing \c nullptr), this filter is invoked \b after a recursive scan of
70 /// a directory. If \c false is returned, the recursion is not performed, but the (empty)
71 /// directory remains in the result list, if field #"RemoveEmptyDirectories" evaluates to
72 /// \c false.<br>
73 /// Note that in case field #"LinkTreatment" is set to
74 /// #"SymbolicLinks::RECURSIVE", this filter
75 /// is also applied to symbolic links, which are readable, not broken, and target a directory.
76 ///
77 /// \note
78 /// Directories (and symbolic links to directories) are first recursively scanned before this
79 /// filter is applied. On deletion, of course the whole scanned subtree is deleted.
80 /// This allows filtering directories, depending on information available only after
81 /// scanning, hence by the numbers retrieved with #"FInfo::Sums;*".
82 /// To increase performance and filter directories \e before their recursive scan,
83 /// alternative field #"DirectoryFilterPreRecursion" is to be used.
84 ///
85 /// \see Optional filters #"DirectoryFilterPreRecursion" and #"FileFilter".
86 ///
88
89 /// Same as #DirectoryFilterPostRecursion but is used \b before a recursive scan of
90 /// a directory. Consequently, this filter leads to much higher scan performance than the
91 /// alternative version, because huge branches of the file system might be omitted during scan.
92 /// However, the numbers retrieved with #"FInfo::Sums;*" will all indicate
93 /// \c 0, because no information is retrieved.<br>
94 /// If a directory is "pruned" due to this filter, the entry still occurs in the \b %FTree,
95 /// unless field #RemoveEmptyDirectories evaluates to \c true.<br>
96 ///
97 /// \see Optional filters #DirectoryFilterPostRecursion and #FileFilter.
98 ///
100
101 /// Constructor accepting all features.
102 /// @param startPath Stored in field #StartPath.
103 /// @param linkTreatment Stored in field #LinkTreatment. Defaults to \b SymbolicLinks::RECURSIVE.
104 /// @param maxDepth Stored in field #MaxDepth. Defaults to +InfiniteRecursion.
105 /// @param crossFileSystems Stored in field #CrossFileSystems. Defaults to \c true.
106 /// @param includeArtificialFS Stored in field #IncludeArtificialFS. Defaults to \c false.
109 unsigned maxDepth = InfiniteRecursion,
110 bool crossFileSystems = true,
111 bool includeArtificialFS= false )
112 : StartPath (startPath )
113 , LinkTreatment (linkTreatment )
114 , MaxDepth (maxDepth )
115 , CrossFileSystems (crossFileSystems )
116 , IncludeArtificialFS(includeArtificialFS) {}
117
118}; // struct ScanParameters
119
120/// A simple vector containing nodes of an #"FTree". Such nodes are collected during calls
121/// of the function #"ScanFiles". One call (aka during the recursive scan of one path) can result
122/// in more than one entry in this list, because with resolving symbolic links new isolated
123/// siblings can occur.<br>
124/// The single new method of this type is #".Add", which checks if the given new start-path
125/// is superseding others or is superseded itself by an existing path. In that case the
126/// superseded path is deleted.
127///
128/// Despite the little effort that \alib takes with the provision of these mechanics, often the
129/// analysis of, or a loop through this path list is not necessary. This is because most
130/// using code would just scan one or more paths and then #"StringTreeIterator;loop through" just
131/// all resulting directory and file nodes that have been inserted into the tree.
132/// Consequently, the function #"ScanFiles" accepts an instance of this class only optionally.
133struct CanonicalPathList : std::vector<FTree::Cursor>
134{
135 /// Adds the given node to the list, in the case it is not superseded by an already
136 /// collected node. Vice versa, existing nodes that are superseded by the given one are removed.
137 /// @param node The node to add.
139 void Add(FTree::Cursor node);
140};
141
142
143/// ### General Information ###
144/// Scans the filesystem according to the given \b ScanParameters and adds #"FInfo"
145/// entries to the given #"FTree".
146///
147/// ### ALib FTree Data Contract ###
148/// This function has a contract with the class #"FTree" that is used to store the scan results.
149/// This contract states that any file or directory found during a scan is always stored using
150/// the <em>"Real Path"</em> of the entry. This means that any symbolic link is resolved.
151/// The consequences are:
152/// - %Files and directories which represent a symbolic link are always "leaf nodes".
153/// (They never contain child nodes.). However, their symlink target path is attached twice
154/// to the entry:
155/// 1. The original link information given, which often uses relative path addressing.
156/// 2. The absolute, <em>"Real Path"</em> of the target, which has a corresponding result entry
157/// in the given \b %FTree.
158/// - If a using software wants to use symbolic paths, for example, to present them to the end
159/// user, such paths have to be assembled by the user's code in own responsibility.
160/// All information for doing this is provided in the resulting tree object
161/// - Doubly linked target files and directories are never a problem for this scanner. Each
162/// file is scanned only once. This especially prevents all sorts of problems that would otherwise
163/// occur with cyclic symbolic links.
164/// - Due to this, even the given start path of a search might not be found as a result
165/// in the given \b %FTree, because also start paths are converted to a <em>Real Path</em>.
166/// - The scan result may contain more than one resulting path. This happens if a symbolic link
167/// targets a file or directory not recursively included in the start path.
168/// The resulting <em>"Real Path"</em> of the given start path is, however, always the first
169/// result added.
170///
171/// The latter is reflected with (optional) parameter \p{resultPaths} of this function, which is
172/// of type #"CanonicalPathList".
173///
174/// \note
175/// Because the class #"FTree" is based on class #"StringTree", using code
176/// is enabled to break this contract by adding entries below symbolic links.
177/// Other entities of this \alibmod_nl will not break this contract.
178///
179/// ### Rescanning of Entries ###
180/// Existing entries in the given \p{tree} are not overwritten. They might be scanned with "higher"
181/// #"FInfo::ScanStates;*" values, depending on given \p{parameters} and how they had been
182/// scanned before. If the same "level" of scanning is provided, existing entries will not be
183/// scanned again. If a rescan of a certain path is wanted, then the target entry of that path has
184/// to be deleted before invoking this function. Due to the implementation of class FTree, repeated
185/// delete and scan operations will not cause any heap-memory allocations or deallocations.
186///
187/// ### platform-dependent Code Selection ###
188/// File scanning is a platform-dependent task and hence \b ALib uses one of two different
189/// implementations:
190/// 1. A posix version for posix compatible OSes,
191/// 2. A version that relies on <c>C++ std::filesystem</c>.
192///
193/// The fallback version using <c>std::filesystem</c> has the following restrictions:
194/// - The only time attribute available is the #"FInfo::MDate;modification time" of
195/// an entry. The fields #"FInfo::BDate", #"FInfo::CDate", and #"FInfo::ADate" are always set
196/// to the same as the modification time, even on filesystems that support the other values.
197/// - The file time of symbolic links is \b always that of the target file. The C++ standard has
198/// no possibility to access the link's time itself.
199/// - The file time of broken symbolic links is set to the current time (time of scanning).
200/// - The size that directories occupy on a disk cannot be determined.
201/// Directory entries always report size <c>0</c>.
202/// - The target of a symbolic link which points to a non-accessible directory, cannot be resolved
203/// to a "real" (aka canonical) path, even if all other path components before were accessible.
204/// (This is true for the implementation of the standard library under GNU/Linux and Clang
205/// compiler at the time of writing this, 2024/02.)
206/// - The flag #"ScanParameters::CrossFileSystems;*" is ignored. Crossing Filesystems cannot
207/// be detected using purely the standard library.
208/// - A files' owner and owning group is not determined. Instead, #"FInfo::UnknownID;*" is set for
209/// both.
210/// - The scanning process is half as fast as in the Posix version. The reason for this is probably
211/// the internal allocation and deallocation of many quite volatile string objects in the C++
212/// standard library.
213/// Well, but it is still fast though!
214///
215/// \note As for today, using this module under WindowsOS, will fall back to the
216/// <em>C++ std::filesystem</em> version. It may be that a future version will provide a
217/// native implementation of this target system. Volunteers from the community are welcome to
218/// contribute.
219///
220/// @param tree The tree to fill.
221/// @param parameters The input parameters to determine the scan process.
222/// @param[out] resultPaths An optional container to store the result paths of a scan.
223/// If \c nullptr is given, the result paths are not collected. See the
224/// #"CanonicalPathList;types documentation" for further information.
225/// @param[out] remainingStart An optional path string. If given, on failure, it will receive the
226/// remainder of the path given with #"ScanParameters::StartPath;2"
227/// starting with the first directory or file that could not be resolved
228/// or accessed.
229///
230/// @return The scan state code of the tree node of the first resulting path, hence of the node
231/// referred to by the given #"ScanParameters::StartPath;2".<br>
232/// On error, i.e. if the start path was invalid, not accessible, a broken link, a circular
233/// link, or other failures, #"ScanStates::NOT_EXISTENT" is returned.
236 ScanParameters& parameters,
237 CanonicalPathList* resultPaths = nullptr,
238 Path* remainingStart = nullptr );
239
240/// Analyses the given \p{sourcePath} and converts it to its canonical version.
241/// This is similar to what the posix function <c>realpath()</c> and C++
242/// <c>std::filesystem::canonical</c> do.<br>
243/// This version, in addition, creates corresponding nodes in the #"FTree" (passed indirectly with
244/// the parameter \p{node}). Besides removing <c>"."</c> and <c>".."</c> entries, symbolic links
245/// are not only resolved, but the nodes they are targeting receive information about the link
246/// that targeted them. This information is set with the method #"File::SetSymbolicParent(File)".
247/// With that, the path of directories or files that are children of such targeted node, can
248/// re-establish the file-path as originally specified. This is done with the method
249/// #"File::AssembleSymbolicPath".
250///
251/// \note
252/// This function is mainly used by the function #"ScanFiles" and a direct use is seldom needed.
253/// @param[in,out] sourcePath The path to scan. This might contain <c>"."</c> and <c>".."</c>
254/// directories, as well as symbolic links.
255/// When the method exits successfully, this path is empty when the
256/// method returns. Otherwise, this path-string contains the remaining
257/// path, starting with the name of the file or directory, that could
258/// not be found, accessed, or otherwise be resolved.
259/// @param[in,out] node The starting node. In case the parameter \p{sourcePath} is an
260/// absolute path, this node is changed to the
261/// #"TCursor::GoToRoot;root folder" of the #"FTree".<br>
262/// When the method exits successfully, this cursor targets the
263/// file that the source path resolved to.<br>
264/// In case of failure, this cursor becomes
265/// #"TCursor::IsInvalid;invalid".
266/// @param[in,out] pathToNode This path has to point to the given \p{node} when the method is
267/// called. When the method returns, it points to the then moved
268/// \p{node}.
269/// When the method exits this contains the path to the modified \p{node}
270/// not scanned, yet.
271/// @param[in,out] resultPaths Todo
272/// @return The scan state of the target node. This should usually be #"FInfo::ScanStates::STATS"
273/// or #"FInfo::ScanStates::RESOLVED". In case of failure,
274/// #"FInfo::ScanStates::NOT_EXISTENT" or other scan state values that indicate failure
275/// are returned.
277 FTree::Cursor& node,
278 Path& pathToNode,
279 CanonicalPathList* resultPaths= nullptr );
280
281} // namespace alib[::files]
282
283
284/// Type alias in namespace \b alib.
286
287/// Type alias in namespace \b alib.
289
290} // namespace [alib]
#define ALIB_DLL
Definition alib.inl:573
#define ALIB_EXPORT
Definition alib.inl:562
ScanStates
Per-entry information about how a node was scanned.
Definition finfo.inl:125
String DBG_FILES_SCAN_VERBOSE_LOG_FORMAT
FInfo::ScanStates MakeCanonical(Path &sourcePath, FTree::Cursor &node, Path &pathToNode, CanonicalPathList *resultPaths=nullptr)
FInfo::ScanStates ScanFiles(FTree &tree, ScanParameters &parameters, CanonicalPathList *resultPaths=nullptr, Path *remainingStart=nullptr)
std::shared_ptr< FFilter > SPFileFilter
A shared pointer to a filter.
Definition ffilter.inl:44
strings::TString< PathCharType > PathString
The string-type used with this ALib Module.
Definition path.inl:33
files::CanonicalPathList CanonicalPathList
Type alias in namespace alib.
Definition fscanner.inl:288
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2172
system::Path Path
Type alias in namespace alib.
Definition path.inl:375
files::ScanParameters ScanParameters
Type alias in namespace alib.
Definition fscanner.inl:285
void Add(FTree::Cursor node)
Input parameters to function #"ScanFiles(FTree&)".
Definition fscanner.inl:20
unsigned MaxDepth
The maximum recursion depth. Defaults to #"InfiniteRecursion".
Definition fscanner.inl:43
SPFileFilter DirectoryFilterPreRecursion
Definition fscanner.inl:99
static constexpr unsigned InfiniteRecursion
Denotes 'infinite' recursion if set to field #"MaxDepth".
Definition fscanner.inl:34
SymbolicLinks LinkTreatment
Denotes how symbolic links are treated.
Definition fscanner.inl:40
SymbolicLinks
Options for processing symbolic links.
Definition fscanner.inl:23
@ DONT_RESOLVE
Demands not to resolve symbolic links in any way.
Definition fscanner.inl:24
Path StartPath
The path to be scanned.
Definition fscanner.inl:37
SPFileFilter DirectoryFilterPostRecursion
Definition fscanner.inl:87
ScanParameters(const system::PathString &startPath, SymbolicLinks linkTreatment=SymbolicLinks::RECURSIVE, unsigned maxDepth=InfiniteRecursion, bool crossFileSystems=true, bool includeArtificialFS=false)
Definition fscanner.inl:107