AirLibrary/Indexing/State/
UpdateState.rs

1//! # UpdateState
2//!
3//! ## File: Indexing/State/UpdateState.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides state update operations for the File Indexer service, handling
8//! modification of index structures including adding, removing, and updating
9//! entries in the file index.
10//!
11//! ## Primary Responsibility
12//!
13//! Update file index state by adding/removing files, symbols, and content
14//! entries in a thread-safe manner.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Remove deleted files from all indexes
19//! - Update symbol index with new symbol locations
20//! - Update content index with new file paths
21//! - Maintain index version and checksum on updates
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - `tokio` - Async runtime for update operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::CreateState` - State structure definitions
32//!
33//! ## Dependents
34//!
35//! - `Indexing::Scan::ScanDirectory` - Updates index after directory scan
36//! - `Indexing::Scan::ScanFile` - Updates index after file scan
37//! - `Indexing::Store::UpdateIndex` - Incremental index updates
38//! - `Indexing::Watch::WatchFile` - Updates index on file changes
39//!
40//! ## VSCode Pattern Reference
41//!
42//! Inspired by VSCode's index update operations in
43//! `src/vs/workbench/services/search/common/`
44//!
45//! ## Security Considerations
46//!
47//! - Thread-safe updates prevent race conditions
48//! - Path validation before state updates
49//! - Size limits enforced on all update operations
50//!
51//! ## Performance Considerations
52//!
53//! - Incremental updates minimize reindexing
54//! - Batch updates for multiple files
55//! - Efficient hash lookups for O(1) updates
56//!
57//! ## Error Handling Strategy
58//!
59//! Update operations silently fail on missing keys (idempotent) and
60//! propagate errors for I/O failures or invalid state transitions.
61//!
62//! ## Thread Safety
63//!
64//! All update operations are designed to work within RwLock write
65//! guards and should be called while holding appropriate locks.
66
67use std::path::PathBuf;
68
69use crate::{
70	AirError,
71	Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolLocation},
72	Result,
73};
74
75/// Add a file to the index with its metadata and symbols
76pub fn AddFileToIndex(
77	index:&mut FileIndex,
78	file_path:PathBuf,
79	metadata:FileMetadata,
80	symbols:Vec<SymbolInfo>,
81) -> Result<()> {
82	// Check if file already exists and update accordingly
83	let is_new = !index.files.contains_key(&file_path);
84
85	// Add or update file metadata
86	index.files.insert(file_path.clone(), metadata.clone());
87
88	// Update symbol index
89	if is_new {
90		// Clear old symbols for this file if any
91		index.file_symbols.remove(&file_path);
92	}
93
94	// Add new symbols
95	index.file_symbols.insert(file_path.clone(), symbols.clone());
96
97	// Update symbol index for cross-referencing
98	for symbol in symbols {
99		index
100			.symbol_index
101			.entry(symbol.name.clone())
102			.or_insert_with(Vec::new)
103			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
104	}
105
106	Ok(())
107}
108
109/// Remove a file from all indexes (content, symbols, files)
110pub fn RemoveFileFromIndex(index:&mut FileIndex, file_path:&PathBuf) -> Result<()> {
111	// Remove from files index
112	index.files.remove(file_path);
113
114	// Remove from file_symbols
115	index.file_symbols.remove(file_path);
116
117	// Remove from symbol index
118	for (_, locations) in index.symbol_index.iter_mut() {
119		locations.retain(|loc| loc.file_path != *file_path);
120	}
121
122	// Remove from content index
123	for (_, files) in index.content_index.iter_mut() {
124		files.retain(|p| p != file_path);
125	}
126
127	Ok(())
128}
129
130/// Remove multiple files from the index in a batch operation
131pub fn RemoveFilesFromIndex(index:&mut FileIndex, file_paths:&[PathBuf]) -> Result<()> {
132	for file_path in file_paths {
133		RemoveFileFromIndex(index, file_path)?;
134	}
135	Ok(())
136}
137
138/// Update index metadata (version, timestamp, checksum)
139pub fn UpdateIndexMetadata(index:&mut FileIndex) -> Result<()> {
140	use crate::Indexing::State::CreateState::{CalculateIndexChecksum, GenerateIndexVersion};
141
142	index.last_updated = chrono::Utc::now();
143	index.index_version = GenerateIndexVersion();
144	index.index_checksum = CalculateIndexChecksum(index)?;
145
146	Ok(())
147}
148
149/// Update file metadata for an existing file
150pub fn UpdateFileMetadata(index:&mut FileIndex, file_path:&PathBuf, metadata:FileMetadata) -> Result<()> {
151	if !index.files.contains_key(file_path) {
152		return Err(AirError::Internal(format!(
153			"Cannot update metadata for file not in index: {}",
154			file_path.display()
155		)));
156	}
157
158	index.files.insert(file_path.clone(), metadata);
159	Ok(())
160}
161
162/// Update symbols for a file
163pub fn UpdateFileSymbols(index:&mut FileIndex, file_path:&PathBuf, symbols:Vec<SymbolInfo>) -> Result<()> {
164	if !index.files.contains_key(file_path) {
165		return Err(AirError::Internal(format!(
166			"Cannot update symbols for file not in index: {}",
167			file_path.display()
168		)));
169	}
170
171	// Remove old symbols from symbol index
172	if let Some(old_symbols) = index.file_symbols.get(file_path) {
173		for old_symbol in old_symbols {
174			if let Some(locations) = index.symbol_index.get_mut(&old_symbol.name) {
175				locations.retain(|loc| loc.file_path != *file_path);
176			}
177		}
178	}
179
180	// Add new symbols
181	index.file_symbols.insert(file_path.clone(), symbols.clone());
182
183	for symbol in symbols {
184		index
185			.symbol_index
186			.entry(symbol.name.clone())
187			.or_insert_with(Vec::new)
188			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
189	}
190
191	Ok(())
192}
193
194/// Update content index for a file
195pub fn UpdateContentIndex(index:&mut FileIndex, file_path:&PathBuf, tokens:Vec<String>) -> Result<()> {
196	// Remove file from existing content index entries
197	for (_, files) in index.content_index.iter_mut() {
198		files.retain(|p| p != file_path);
199	}
200
201	// Add new tokens
202	for token in tokens {
203		if token.len() > 2 {
204			// Only index tokens longer than 2 characters
205			index
206				.content_index
207				.entry(token)
208				.or_insert_with(Vec::new)
209				.push(file_path.clone());
210		}
211	}
212
213	Ok(())
214}
215
216/// Clean up orphaned entries (files with no matching content/symbols)
217pub fn CleanupOrphanedEntries(index:&mut FileIndex) -> Result<u32> {
218	let mut removed_count = 0;
219
220	// Clean up content index entries with no files
221	let orphaned_tokens:Vec<_> = index
222		.content_index
223		.iter()
224		.filter(|(_, files)| files.is_empty())
225		.map(|(token, _)| token.clone())
226		.collect();
227
228	for token in orphaned_tokens {
229		index.content_index.remove(&token);
230		removed_count += 1;
231	}
232
233	// Clean up symbol index entries with no locations
234	let orphaned_symbols:Vec<_> = index
235		.symbol_index
236		.iter()
237		.filter(|(_, locations)| locations.is_empty())
238		.map(|(symbol, _)| symbol.clone())
239		.collect();
240
241	for symbol in orphaned_symbols {
242		index.symbol_index.remove(&symbol);
243		removed_count += 1;
244	}
245
246	Ok(removed_count)
247}
248
249/// Merge another index into this one
250pub fn MergeIndexes(target:&mut FileIndex, source:FileIndex) -> Result<u32> {
251	let mut merged_files = 0;
252
253	// Merge files
254	for (path, metadata) in source.files {
255		if !target.files.contains_key(&path) {
256			target.files.insert(path.clone(), metadata);
257			merged_files += 1;
258		}
259	}
260
261	// Merge content index
262	for (token, mut files) in source.content_index {
263		target.content_index.entry(token).or_insert_with(Vec::new).append(&mut files);
264	}
265
266	// Merge symbol index
267	for (symbol, mut locations) in source.symbol_index {
268		target
269			.symbol_index
270			.entry(symbol)
271			.or_insert_with(Vec::new)
272			.append(&mut locations);
273	}
274
275	// Merge file symbols
276	for (path, symbols) in source.file_symbols {
277		if !target.file_symbols.contains_key(&path) {
278			target.file_symbols.insert(path, symbols);
279		}
280	}
281
282	// Update metadata
283	UpdateIndexMetadata(target)?;
284
285	Ok(merged_files)
286}
287
288/// Validate that index is in a consistent state
289pub fn ValidateIndexConsistency(index:&FileIndex) -> Result<()> {
290	// Check that all files in content_index exist in files
291	for (_, files) in &index.content_index {
292		for file_path in files {
293			if !index.files.contains_key(file_path) {
294				return Err(AirError::Internal(format!(
295					"Content index references non-existent file: {}",
296					file_path.display()
297				)));
298			}
299		}
300	}
301
302	// Check that all files in symbol_index exist in files
303	for (_, locations) in &index.symbol_index {
304		for location in locations {
305			if !index.files.contains_key(&location.file_path) {
306				return Err(AirError::Internal(format!(
307					"Symbol index references non-existent file: {}",
308					location.file_path.display()
309				)));
310			}
311		}
312	}
313
314	// Check that all files in file_symbols exist in files
315	for (file_path, _) in &index.file_symbols {
316		if !index.files.contains_key(file_path) {
317			return Err(AirError::Internal(format!(
318				"File symbols references non-existent file: {}",
319				file_path.display()
320			)));
321		}
322	}
323
324	Ok(())
325}
326
327/// Get index size estimate in bytes
328pub fn GetIndexSizeEstimate(index:&FileIndex) -> usize {
329	let mut size = 0;
330
331	// File metadata
332	for (path, _metadata) in &index.files {
333		size += path.as_os_str().len();
334		size += std::mem::size_of::<FileMetadata>();
335	}
336
337	// Content index
338	for (token, files) in &index.content_index {
339		size += token.len();
340		size += files.len() * std::mem::size_of::<PathBuf>();
341	}
342
343	// Symbol index
344	for (symbol, locations) in &index.symbol_index {
345		size += symbol.len();
346		size += locations.len() * std::mem::size_of::<SymbolLocation>();
347	}
348
349	// File symbols
350	for (path, symbols) in &index.file_symbols {
351		size += path.as_os_str().len();
352		size += symbols.len() * std::mem::size_of::<SymbolInfo>();
353	}
354
355	size
356}
357
358/// Check if periodic update is needed based on age
359pub fn NeedsUpdate(index:&FileIndex, max_age_minutes:u64) -> bool {
360	let age_minutes = (chrono::Utc::now() - index.last_updated).num_minutes().abs() as u64;
361	age_minutes >= max_age_minutes
362}