AirLibrary/Indexing/Scan/
ScanFile.rs1use std::{
73 path::PathBuf,
74 time::{Duration, Instant},
75};
76
77use crate::{
78 AirError,
79 Configuration::IndexingConfig,
80 Indexing::{
81 Process::{
82 ProcessContent::{DetectEncoding, DetectLanguage, DetectMimeType},
83 ExtractSymbols::ExtractSymbols,
84 },
85 State::CreateState::{FileMetadata, SymbolInfo},
86 },
87 Result,
88};
89
90pub async fn IndexFileInternal(
102 file_path:&PathBuf,
103 config:&IndexingConfig,
104 _patterns:&[String],
105) -> Result<(FileMetadata, Vec<SymbolInfo>)> {
106 let start_time = Instant::now();
107
108 let metadata = std::fs::metadata(file_path)
110 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
111
112 let modified = metadata
114 .modified()
115 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
116
117 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
118
119 let file_size = metadata.len();
121 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
122 return Err(AirError::FileSystem(format!(
123 "File size {} exceeds limit {} MB",
124 file_size, config.MaxFileSizeMb
125 )));
126 }
127
128 let content = tokio::time::timeout(Duration::from_secs(30), tokio::fs::read(file_path))
130 .await
131 .map_err(|_| AirError::FileSystem(format!("Timeout reading file: {} (30s limit)", file_path.display())))?
132 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
133
134 let is_symlink = std::fs::symlink_metadata(file_path)
136 .map(|m| m.file_type().is_symlink())
137 .unwrap_or(false);
138
139 let checksum = CalculateChecksum(&content);
141
142 let encoding = DetectEncoding(&content);
144
145 let mime_type = DetectMimeType(file_path, &content);
147
148 let language = DetectLanguage(file_path);
150
151 let line_count = if mime_type.starts_with("text/") {
153 Some(content.iter().filter(|&&b| b == b'\n').count() as u32 + 1)
154 } else {
155 None
156 };
157
158 let symbols = if let Some(lang) = &language {
160 ExtractSymbols(file_path, &content, lang).await?
161 } else {
162 Vec::new()
163 };
164
165 let permissions = GetPermissionsString(&metadata);
166
167 let elapsed = start_time.elapsed();
168
169 log::trace!(
170 "[ScanFile] Indexed {} in {}ms ({} symbols)",
171 file_path.display(),
172 elapsed.as_millis(),
173 symbols.len()
174 );
175
176 Ok((
177 FileMetadata {
178 path:file_path.clone(),
179 size:file_size,
180 modified:modified_time,
181 mime_type,
182 language,
183 line_count,
184 checksum,
185 is_symlink,
186 permissions,
187 encoding,
188 indexed_at:chrono::Utc::now(),
189 symbol_count:symbols.len() as u32,
190 },
191 symbols,
192 ))
193}
194
195pub async fn ValidateFileAccess(file_path:&PathBuf) -> bool {
197 tokio::task::spawn_blocking({
198 let file_path = file_path.to_path_buf();
199 move || {
200 let can_access = std::fs::metadata(&file_path).is_ok();
202 if can_access {
203 std::fs::File::open(&file_path).is_ok()
205 } else {
206 false
207 }
208 }
209 })
210 .await
211 .unwrap_or(false)
212}
213
214pub fn CalculateChecksum(content:&[u8]) -> String {
216 use sha2::{Digest, Sha256};
217 let mut hasher = Sha256::new();
218 hasher.update(content);
219 format!("{:x}", hasher.finalize())
220}
221
222#[cfg(unix)]
224pub fn GetPermissionsString(metadata:&std::fs::Metadata) -> String {
225 use std::os::unix::fs::PermissionsExt;
226 let mode = metadata.permissions().mode();
227 let mut perms = String::new();
228 perms.push(if mode & 0o400 != 0 { 'r' } else { '-' });
230 perms.push(if mode & 0o200 != 0 { 'w' } else { '-' });
232 perms.push(if mode & 0o100 != 0 { 'x' } else { '-' });
234 perms.push(if mode & 0o040 != 0 { 'r' } else { '-' });
236 perms.push(if mode & 0o020 != 0 { 'w' } else { '-' });
237 perms.push(if mode & 0o010 != 0 { 'x' } else { '-' });
238 perms.push(if mode & 0o004 != 0 { 'r' } else { '-' });
240 perms.push(if mode & 0o002 != 0 { 'w' } else { '-' });
241 perms.push(if mode & 0o001 != 0 { 'x' } else { '-' });
242 perms
243}
244
245#[cfg(not(unix))]
247pub fn GetPermissionsString(_metadata:&std::fs::Metadata) -> String { "--------".to_string() }
248
249pub async fn ScanFileMetadata(file_path:&PathBuf) -> Result<FileMetadata> {
251 let metadata = std::fs::metadata(file_path)
252 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
253
254 let modified = metadata
255 .modified()
256 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
257
258 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
259
260 Ok(FileMetadata {
261 path:file_path.clone(),
262 size:metadata.len(),
263 modified:modified_time,
264 mime_type:"application/octet-stream".to_string(),
265 language:None,
266 line_count:None,
267 checksum:String::new(),
268 is_symlink:metadata.file_type().is_symlink(),
269 permissions:GetPermissionsString(&metadata),
270 encoding:None,
271 indexed_at:chrono::Utc::now(),
272 symbol_count:0,
273 })
274}
275
276pub fn FileModifiedSince(file_path:&PathBuf, last_indexed:chrono::DateTime<chrono::Utc>) -> Result<bool> {
278 let metadata = std::fs::metadata(file_path)
279 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
280
281 let modified = metadata
282 .modified()
283 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
284
285 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
286
287 Ok(modified_time > last_indexed)
288}
289
290pub async fn GetFileSize(file_path:&PathBuf) -> Result<u64> {
292 tokio::task::spawn_blocking({
293 let file_path = file_path.to_path_buf();
294 move || {
295 let metadata = std::fs::metadata(&file_path)
296 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
297 Ok(metadata.len())
298 }
299 })
300 .await?
301}
302
303pub fn IsTextFile(metadata:&FileMetadata) -> bool {
305 metadata.mime_type.starts_with("text/")
306 || metadata.mime_type.contains("json")
307 || metadata.mime_type.contains("xml")
308 || metadata.mime_type.contains("yaml")
309 || metadata.mime_type.contains("toml")
310 || metadata.language.is_some()
311}
312
313pub fn IsBinaryFile(metadata:&FileMetadata) -> bool {
315 !IsTextFile(metadata)
316 || metadata.mime_type == "application/octet-stream"
317 || metadata.mime_type == "application/zip"
318 || metadata.mime_type == "application/x-tar"
319 || metadata.mime_type == "application/x-gzip"
320 || metadata.mime_type == "application/x-bzip2"
321}