1pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80 AirError,
81 ApplicationState::ApplicationState,
82 Configuration::ConfigurationManager,
83 Indexing::{
84 Scan::{
85 ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
86 ScanFile::IndexFileInternal,
87 },
88 State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
89 Store::{
90 QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
91 StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
92 UpdateIndex::UpdateFileContent,
93 },
94 },
95 Result,
96};
97use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
99
100const MAX_PARALLEL_INDEXING:usize = 10;
102
103#[derive(Debug, Clone)]
105pub struct IndexResult {
106 pub files_indexed:u32,
108 pub total_size:u64,
110 pub duration_seconds:f64,
112 pub symbols_extracted:u32,
114 pub files_with_errors:u32,
116}
117
118#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
120pub struct IndexStatistics {
121 pub file_count:u32,
122 pub total_size:u64,
123 pub total_symbols:u32,
124 pub language_counts:HashMap<String, u32>,
125 pub last_updated:chrono::DateTime<chrono::Utc>,
126 pub index_version:String,
127}
128
129pub struct FileIndexer {
139 AppState:Arc<ApplicationState>,
141
142 file_index:Arc<RwLock<FileIndex>>,
144
145 index_directory:PathBuf,
147
148 file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
150
151 indexing_semaphore:Arc<tokio::sync::Semaphore>,
153
154 corruption_detected:Arc<Mutex<bool>>,
156}
157
158impl FileIndexer {
159 pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
167 let config = &AppState.Configuration.Indexing;
168
169 let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
171
172 EnsureIndexDirectory(&index_directory).await?;
174
175 let file_index = LoadOrCreateIndex(&index_directory).await?;
177
178 let indexer = Self {
179 AppState:AppState.clone(),
180 file_index:Arc::new(RwLock::new(file_index)),
181 index_directory:index_directory.clone(),
182 file_watcher:Arc::new(Mutex::new(None)),
183 indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
184 corruption_detected:Arc::new(Mutex::new(false)),
185 };
186
187 indexer.VerifyIndexIntegrity().await?;
189
190 indexer
192 .AppState
193 .UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
194 .await
195 .map_err(|e| AirError::Internal(e.to_string()))?;
196
197 log::info!("[FileIndexer] Initialized with index directory: {}", index_directory.display());
198
199 Ok(indexer)
200 }
201
202 fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
204 let expanded = ConfigurationManager::ExpandPath(path)?;
205
206 let path_str = expanded.to_string_lossy();
208 if path_str.contains("..") {
209 return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
210 }
211
212 Ok(expanded)
213 }
214
215 async fn VerifyIndexIntegrity(&self) -> Result<()> {
217 let index = self.file_index.read().await;
218
219 ValidateIndexConsistency(&index)?;
221
222 let mut missing_files = 0;
224 for file_path in index.files.keys() {
225 if !file_path.exists() {
226 missing_files += 1;
227 }
228 }
229
230 if missing_files > 0 {
231 log::warn!("[FileIndexer] Found {} missing files in index", missing_files);
232 }
233
234 log::info!("[FileIndexer] Index integrity verified successfully");
235
236 Ok(())
237 }
238
239 pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
241 let start_time = std::time::Instant::now();
242
243 log::info!("[FileIndexer] Starting directory index: {}", path);
244
245 let config = &self.AppState.Configuration.Indexing;
246
247 let (files_to_index, _scan_result) =
249 ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
250
251 let _index_arc = self.file_index.clone();
254 let semaphore = self.indexing_semaphore.clone();
255 let config_clone = config.clone();
256 let mut index_tasks = Vec::new();
257
258 for file_path in files_to_index {
259 let permit = semaphore.clone().acquire_owned().await.unwrap();
260 let config_for_task = config_clone.clone();
261
262 let task = tokio::spawn(async move {
263 let _permit = permit;
264 IndexFileInternal(&file_path, &config_for_task, &[]).await
265 });
266
267 index_tasks.push(task);
268 }
269
270 let mut index = self.file_index.write().await;
272 let mut indexed_paths = std::collections::HashSet::new();
273 let mut files_indexed = 0u32;
274 let mut total_size = 0u64;
275 let mut symbols_extracted = 0u32;
276 let mut files_with_errors = 0u32;
277
278 for task in index_tasks {
279 match task.await {
280 Ok(Ok((metadata, symbols))) => {
281 let file_path = metadata.path.clone();
282
283 index.files.insert(file_path.clone(), metadata.clone());
284 indexed_paths.insert(file_path.clone());
285
286 if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
288 log::warn!("[FileIndexer] Failed to index content for {}: {}", file_path.display(), e);
289 }
290
291 index.file_symbols.insert(file_path.clone(), symbols.clone());
293 symbols_extracted += symbols.len() as u32;
294
295 for symbol in symbols {
297 index
298 .symbol_index
299 .entry(symbol.name.clone())
300 .or_insert_with(Vec::new)
301 .push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
302 }
303
304 files_indexed += 1;
305 total_size += metadata.size;
306 },
307 Ok(Err(_)) => {
308 files_with_errors += 1;
309 },
310 Err(e) => {
311 log::error!("[FileIndexer] Indexing task failed: {}", e);
312 files_with_errors += 1;
313 },
314 }
315 }
316
317 ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
319
320 UpdateIndexMetadata(&mut index)?;
322
323 SaveIndex(&self.index_directory, &index).await?;
325
326 let duration = start_time.elapsed().as_secs_f64();
327
328 log::info!(
329 "[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
330 files_indexed,
331 total_size,
332 symbols_extracted,
333 files_with_errors,
334 duration
335 );
336
337 Ok(IndexResult {
338 files_indexed,
339 total_size,
340 duration_seconds:duration,
341 symbols_extracted,
342 files_with_errors,
343 })
344 }
345
346 pub async fn SearchFiles(
348 &self,
349 query:SearchQuery,
350 path:Option<String>,
351 language:Option<String>,
352 ) -> Result<PaginatedSearchResults> {
353 let index = self.file_index.read().await;
354 QueryIndexSearch(&index, query, path, language).await
355 }
356
357 pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
359 let index = self.file_index.read().await;
360 let query_lower = query.to_lowercase();
361 let mut results = Vec::new();
362
363 for (symbol_name, locations) in &index.symbol_index {
364 if symbol_name.to_lowercase().contains(&query_lower) {
365 for loc in locations.iter().take(max_results as usize) {
366 results.push(loc.symbol.clone());
367 if results.len() >= max_results as usize {
368 break;
369 }
370 }
371 }
372 }
373
374 Ok(results)
375 }
376
377 pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
379 let index = self.file_index.read().await;
380 Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
381 }
382
383 pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
385 let file_path = Self::ValidateAndExpandPath(&path)?;
386 let index = self.file_index.read().await;
387
388 Ok(index.files.get(&file_path).cloned())
389 }
390
391 pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
393 let index = self.file_index.read().await;
394
395 let mut language_counts:HashMap<String, u32> = HashMap::new();
396 let total_size = index.files.values().map(|m| m.size).sum();
397 let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
398
399 for metadata in index.files.values() {
400 if let Some(lang) = &metadata.language {
401 *language_counts.entry(lang.clone()).or_insert(0) += 1;
402 }
403 }
404
405 Ok(IndexStatistics {
406 file_count:index.files.len() as u32,
407 total_size,
408 total_symbols,
409 language_counts,
410 last_updated:index.last_updated,
411 index_version:index.index_version.clone(),
412 })
413 }
414
415 pub async fn recover_from_corruption(&self) -> Result<()> {
417 log::info!("[FileIndexer] Recovering from corrupted index...");
418
419 BackupCorruptedIndex(&self.index_directory).await?;
421
422 let new_index = CreateNewIndex();
424 *self.file_index.write().await = new_index;
425
426 *self.corruption_detected.lock().await = false;
428
429 log::info!("[FileIndexer] Index recovery completed");
430
431 Ok(())
432 }
433}
434
435impl Clone for FileIndexer {
436 fn clone(&self) -> Self {
437 Self {
438 AppState:self.AppState.clone(),
439 file_index:self.file_index.clone(),
440 index_directory:self.index_directory.clone(),
441 file_watcher:self.file_watcher.clone(),
442 indexing_semaphore:self.indexing_semaphore.clone(),
443 corruption_detected:self.corruption_detected.clone(),
444 }
445 }
446}