AirLibrary/Indexing/
mod.rs

1//! # File Indexing and Search Service
2//!
3//! ## File: Indexing/mod.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides comprehensive file indexing, search, and content analysis
8//! capabilities for the Land ecosystem, inspired by and compatible with
9//! Visual Studio Code's search service.
10//!
11//! ## Primary Responsibility
12//!
13//! Facade module for the Indexing service, exposing the public API for
14//! file indexing, search, and symbol extraction operations.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Re-export public types from submodule
19//! - Provide unified FileIndexer API
20//! - Coordinate between indexing subsystems
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `regex` - Regular expression search patterns
26//! - `serde` - Serialization for index storage
27//! - `tokio` - Async runtime for all operations
28//! - `notify` - File system watching
29//! - `chrono` - Timestamp management
30//!
31//! **Internal Modules:**
32//! - `crate::Result` - Error handling type
33//! - `crate::AirError` - Error types
34//! - `crate::ApplicationState::ApplicationState` - Application state
35//! - `crate::Configuration::ConfigurationManager` - Configuration management
36//!
37//! ## Dependents
38//!
39//! - `Indexing::FileIndexer` - Main indexer implementation
40//! - `Vine::Server::AirVinegRPCService` - gRPC integration
41//!
42//! ## VSCode Integration
43//!
44//! This service integrates with VSCode's search and file service architecture:
45//!
46//! - References: vs/workbench/services/search
47//! - File Service: vs/workbench/services/files
48//!
49//! The indexing system supports VSCode features:
50//! - **Outline View**: Symbol extraction for class/function navigation
51//! - **Go to Symbol**: Cross-file symbol search and lookup
52//! - **Search Integration**: File content and name search with regex support
53//! - **Workspace Search**: Multi-workspace index sharing
54//!
55//! ## TODO
56//!
57//! - [ ] Implement full ripgrep integration for ultra-fast text search
58//! - [ ] Add project-level search with workspace awareness
59//! - [ ] Implement search query caching
60//! - [ ] Add fuzzy search with typos tolerance
61//! - [ ] Implement search history and recent queries
62//! - [ ] Add search result preview with context
63//! - [ ] Implement parallel indexing for large directories
64
65// Modules - file-based (no inline definitions)
66pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74// Import types and functions needed for the FileIndexer implementation
75use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80	AirError,
81	ApplicationState::ApplicationState,
82	Configuration::ConfigurationManager,
83	Indexing::{
84		Scan::{
85			ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
86			ScanFile::IndexFileInternal,
87		},
88		State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
89		Store::{
90			QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
91			StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
92			UpdateIndex::UpdateFileContent,
93		},
94	},
95	Result,
96};
97// Import types from submodules with explicit full paths
98use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
99
100/// Maximum number of parallel indexing operations
101const MAX_PARALLEL_INDEXING:usize = 10;
102
103/// Indexing result with statistics
104#[derive(Debug, Clone)]
105pub struct IndexResult {
106	/// Number of files successfully indexed
107	pub files_indexed:u32,
108	/// Total size of indexed files in bytes
109	pub total_size:u64,
110	/// Time taken in seconds
111	pub duration_seconds:f64,
112	/// Number of symbols extracted
113	pub symbols_extracted:u32,
114	/// Number of files with errors
115	pub files_with_errors:u32,
116}
117
118/// Index statistics
119#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
120pub struct IndexStatistics {
121	pub file_count:u32,
122	pub total_size:u64,
123	pub total_symbols:u32,
124	pub language_counts:HashMap<String, u32>,
125	pub last_updated:chrono::DateTime<chrono::Utc>,
126	pub index_version:String,
127}
128
129/// File indexer implementation with comprehensive search capabilities
130///
131/// This indexer provides:
132/// - Incremental file watching with real-time updates
133/// - Multi-mode search (literal, regex, fuzzy)
134/// - Symbol extraction for VSCode Outline View
135/// - Language detection for syntax highlighting
136/// - Index corruption detection and recovery
137/// - Parallel indexing with resource limits
138pub struct FileIndexer {
139	/// Application state
140	AppState:Arc<ApplicationState>,
141
142	/// File index with metadata and symbols
143	file_index:Arc<RwLock<FileIndex>>,
144
145	/// Index storage directory
146	index_directory:PathBuf,
147
148	/// File watcher for incremental updates
149	file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
150
151	/// Semaphore for limiting parallel indexing operations
152	indexing_semaphore:Arc<tokio::sync::Semaphore>,
153
154	/// Index corruption detection state
155	corruption_detected:Arc<Mutex<bool>>,
156}
157
158impl FileIndexer {
159	/// Create a new file indexer with comprehensive setup
160	///
161	/// Initializes the indexer with:
162	/// - Index directory creation
163	/// - Existing index loading or fresh creation
164	/// - Index corruption detection
165	/// - Service status initialization
166	pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
167		let config = &AppState.Configuration.Indexing;
168
169		// Expand index directory path with validation
170		let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
171
172		// Create index directory if it doesn't exist with error handling
173		EnsureIndexDirectory(&index_directory).await?;
174
175		// Load or create index with corruption detection
176		let file_index = LoadOrCreateIndex(&index_directory).await?;
177
178		let indexer = Self {
179			AppState:AppState.clone(),
180			file_index:Arc::new(RwLock::new(file_index)),
181			index_directory:index_directory.clone(),
182			file_watcher:Arc::new(Mutex::new(None)),
183			indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
184			corruption_detected:Arc::new(Mutex::new(false)),
185		};
186
187		// Verify index integrity
188		indexer.VerifyIndexIntegrity().await?;
189
190		// Initialize service status
191		indexer
192			.AppState
193			.UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
194			.await
195			.map_err(|e| AirError::Internal(e.to_string()))?;
196
197		log::info!("[FileIndexer] Initialized with index directory: {}", index_directory.display());
198
199		Ok(indexer)
200	}
201
202	/// Validate and expand path with traversal protection
203	fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
204		let expanded = ConfigurationManager::ExpandPath(path)?;
205
206		// Prevent path traversal attacks
207		let path_str = expanded.to_string_lossy();
208		if path_str.contains("..") {
209			return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
210		}
211
212		Ok(expanded)
213	}
214
215	/// Verify index integrity and detect corruption
216	async fn VerifyIndexIntegrity(&self) -> Result<()> {
217		let index = self.file_index.read().await;
218
219		// Check consistency
220		ValidateIndexConsistency(&index)?;
221
222		// Verify all indexed files exist
223		let mut missing_files = 0;
224		for file_path in index.files.keys() {
225			if !file_path.exists() {
226				missing_files += 1;
227			}
228		}
229
230		if missing_files > 0 {
231			log::warn!("[FileIndexer] Found {} missing files in index", missing_files);
232		}
233
234		log::info!("[FileIndexer] Index integrity verified successfully");
235
236		Ok(())
237	}
238
239	/// Index a directory with comprehensive validation and parallel processing
240	pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
241		let start_time = std::time::Instant::now();
242
243		log::info!("[FileIndexer] Starting directory index: {}", path);
244
245		let config = &self.AppState.Configuration.Indexing;
246
247		// Scan directory
248		let (files_to_index, _scan_result) =
249			ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
250
251		// Index files in parallel
252		// Variables cloned for use in async task
253		let _index_arc = self.file_index.clone();
254		let semaphore = self.indexing_semaphore.clone();
255		let config_clone = config.clone();
256		let mut index_tasks = Vec::new();
257
258		for file_path in files_to_index {
259			let permit = semaphore.clone().acquire_owned().await.unwrap();
260			let config_for_task = config_clone.clone();
261
262			let task = tokio::spawn(async move {
263				let _permit = permit;
264				IndexFileInternal(&file_path, &config_for_task, &[]).await
265			});
266
267			index_tasks.push(task);
268		}
269
270		// Collect results
271		let mut index = self.file_index.write().await;
272		let mut indexed_paths = std::collections::HashSet::new();
273		let mut files_indexed = 0u32;
274		let mut total_size = 0u64;
275		let mut symbols_extracted = 0u32;
276		let mut files_with_errors = 0u32;
277
278		for task in index_tasks {
279			match task.await {
280				Ok(Ok((metadata, symbols))) => {
281					let file_path = metadata.path.clone();
282
283					index.files.insert(file_path.clone(), metadata.clone());
284					indexed_paths.insert(file_path.clone());
285
286					// Index content for search
287					if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
288						log::warn!("[FileIndexer] Failed to index content for {}: {}", file_path.display(), e);
289					}
290
291					// Index symbols
292					index.file_symbols.insert(file_path.clone(), symbols.clone());
293					symbols_extracted += symbols.len() as u32;
294
295					// Update symbol index
296					for symbol in symbols {
297						index
298							.symbol_index
299							.entry(symbol.name.clone())
300							.or_insert_with(Vec::new)
301							.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
302					}
303
304					files_indexed += 1;
305					total_size += metadata.size;
306				},
307				Ok(Err(_)) => {
308					files_with_errors += 1;
309				},
310				Err(e) => {
311					log::error!("[FileIndexer] Indexing task failed: {}", e);
312					files_with_errors += 1;
313				},
314			}
315		}
316
317		// Remove files that were indexed before but no longer exist
318		ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
319
320		// Update index metadata
321		UpdateIndexMetadata(&mut index)?;
322
323		// Save index to disk
324		SaveIndex(&self.index_directory, &index).await?;
325
326		let duration = start_time.elapsed().as_secs_f64();
327
328		log::info!(
329			"[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
330			files_indexed,
331			total_size,
332			symbols_extracted,
333			files_with_errors,
334			duration
335		);
336
337		Ok(IndexResult {
338			files_indexed,
339			total_size,
340			duration_seconds:duration,
341			symbols_extracted,
342			files_with_errors,
343		})
344	}
345
346	/// Search files with multiple modes
347	pub async fn SearchFiles(
348		&self,
349		query:SearchQuery,
350		path:Option<String>,
351		language:Option<String>,
352	) -> Result<PaginatedSearchResults> {
353		let index = self.file_index.read().await;
354		QueryIndexSearch(&index, query, path, language).await
355	}
356
357	/// Search symbols across all files (for VSCode Go to Symbol)
358	pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
359		let index = self.file_index.read().await;
360		let query_lower = query.to_lowercase();
361		let mut results = Vec::new();
362
363		for (symbol_name, locations) in &index.symbol_index {
364			if symbol_name.to_lowercase().contains(&query_lower) {
365				for loc in locations.iter().take(max_results as usize) {
366					results.push(loc.symbol.clone());
367					if results.len() >= max_results as usize {
368						break;
369					}
370				}
371			}
372		}
373
374		Ok(results)
375	}
376
377	/// Get symbols for a specific file (for VSCode Outline View)
378	pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
379		let index = self.file_index.read().await;
380		Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
381	}
382
383	/// Get file information
384	pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
385		let file_path = Self::ValidateAndExpandPath(&path)?;
386		let index = self.file_index.read().await;
387
388		Ok(index.files.get(&file_path).cloned())
389	}
390
391	/// Get index statistics
392	pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
393		let index = self.file_index.read().await;
394
395		let mut language_counts:HashMap<String, u32> = HashMap::new();
396		let total_size = index.files.values().map(|m| m.size).sum();
397		let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
398
399		for metadata in index.files.values() {
400			if let Some(lang) = &metadata.language {
401				*language_counts.entry(lang.clone()).or_insert(0) += 1;
402			}
403		}
404
405		Ok(IndexStatistics {
406			file_count:index.files.len() as u32,
407			total_size,
408			total_symbols,
409			language_counts,
410			last_updated:index.last_updated,
411			index_version:index.index_version.clone(),
412		})
413	}
414
415	/// Recover corrupted index
416	pub async fn recover_from_corruption(&self) -> Result<()> {
417		log::info!("[FileIndexer] Recovering from corrupted index...");
418
419		// Backup corrupted index
420		BackupCorruptedIndex(&self.index_directory).await?;
421
422		// Create new index
423		let new_index = CreateNewIndex();
424		*self.file_index.write().await = new_index;
425
426		// Clear corruption flag
427		*self.corruption_detected.lock().await = false;
428
429		log::info!("[FileIndexer] Index recovery completed");
430
431		Ok(())
432	}
433}
434
435impl Clone for FileIndexer {
436	fn clone(&self) -> Self {
437		Self {
438			AppState:self.AppState.clone(),
439			file_index:self.file_index.clone(),
440			index_directory:self.index_directory.clone(),
441			file_watcher:self.file_watcher.clone(),
442			indexing_semaphore:self.indexing_semaphore.clone(),
443			corruption_detected:self.corruption_detected.clone(),
444		}
445	}
446}