AirLibrary/Indexing/Store/
StoreEntry.rs

1//! # StoreEntry
2//!
3//! ## File: Indexing/Store/StoreEntry.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides index storage functionality for the File Indexer service,
8//! handling serialization and persistence of the file index to disk.
9//!
10//! ## Primary Responsibility
11//!
12//! Store the file index to disk with atomic writes and corruption recovery
13//! mechanisms.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - Load index from disk with validation
18//! - Backup corrupted indexes automatically
19//! - Atomic writes using temp files
20//! - Index integrity verification
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `serde_json` - JSON serialization/deserialization
26//! - `tokio` - Async file I/O operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::super::FileIndex` - Index structure definitions
32//! - `super::super::State::CreateState` - State creation utilities
33//!
34//! ## Dependents
35//!
36//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
37//!
38//! ## VSCode Pattern Reference
39//!
40//! Inspired by VSCode's index storage in
41//! `src/vs/workbench/services/search/common/`
42//!
43//! ## Security Considerations
44//!
45//! - Atomic writes prevent partial index corruption
46//! - Permission checking on index directory
47//! - Path traversal protection
48//!
49//! ## Performance Considerations
50//!
51//! - Temp file pattern for atomic writes
52//! - Lazy loading of in-memory index
53//! - Efficient serialization with serde
54//!
55//! ## Error Handling Strategy
56//!
57//! Storage operations return detailed error messages for failures and
58//! automatically backup corrupted indexes when loading fails.
59//!
60//! ## Thread Safety
61//!
62//! Storage operations use async file I/O and return results that can be
63//! safely merged into shared Ar c<RwLock<>> state.
64
65use std::path::{Path, PathBuf};
66
67use crate::{
68	AirError,
69	Indexing::State::CreateState::FileIndex,
70	Result,
71};
72
73/// Save index to disk with atomic write
74pub async fn SaveIndex(index_directory:&Path, index:&FileIndex) -> Result<()> {
75	let index_file = index_directory.join("file_index.json");
76	let temp_file = index_directory.join("file_index.json.tmp");
77
78	let content = serde_json::to_string_pretty(index)
79		.map_err(|e| AirError::Serialization(format!("Failed to serialize index: {}", e)))?;
80
81	// Write to temp file first
82	tokio::fs::write(&temp_file, content)
83		.await
84		.map_err(|e| AirError::FileSystem(format!("Failed to write temp index file: {}", e)))?;
85
86	// Atomic rename
87	tokio::fs::rename(&temp_file, &index_file)
88		.await
89		.map_err(|e| AirError::FileSystem(format!("Failed to rename index file: {}", e)))?;
90
91	log::debug!(
92		"[StoreEntry] Index saved to: {} ({} files, {} symbols)",
93		index_file.display(),
94		index.files.len(),
95		index.symbol_index.len()
96	);
97
98	Ok(())
99}
100
101/// Load index from disk with corruption detection
102pub async fn LoadIndex(index_directory:&Path) -> Result<FileIndex> {
103	let index_file = index_directory.join("file_index.json");
104
105	if !index_file.exists() {
106		return Err(AirError::FileSystem(format!(
107			"Index file does not exist: {}",
108			index_file.display()
109		)));
110	}
111
112	let content = tokio::fs::read_to_string(&index_file)
113		.await
114		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
115
116	let index:FileIndex = serde_json::from_str(&content)
117		.map_err(|e| AirError::Serialization(format!("Failed to parse index file: {}", e)))?;
118
119	// Verify index structure
120	if index.index_version.is_empty() || index.index_checksum.is_empty() {
121		return Err(AirError::Serialization("Index missing version or checksum".to_string()));
122	}
123
124	// Verify index checksum
125	use crate::Indexing::State::CreateState::CalculateIndexChecksum;
126	let expected_checksum = CalculateIndexChecksum(&index)?;
127	if index.index_checksum != expected_checksum {
128		return Err(AirError::Serialization(format!(
129			"Index checksum mismatch: expected {}, got {}",
130			expected_checksum, index.index_checksum
131		)));
132	}
133
134	Ok(index)
135}
136
137/// Load or create index with corruption detection
138pub async fn LoadOrCreateIndex(index_directory:&Path) -> Result<FileIndex> {
139	let index_file = index_directory.join("file_index.json");
140
141	if index_file.exists() {
142		// Try to load existing index
143		match LoadIndex(index_directory).await {
144			Ok(index) => {
145				log::info!("[StoreEntry] Loaded index with {} files", index.files.len());
146				Ok(index)
147			},
148			Err(e) => {
149				log::warn!(
150					"[StoreEntry] Failed to load index (may be corrupted): {}. Creating new index.",
151					e
152				);
153				// Backup corrupted index
154				BackupCorruptedIndex(index_directory).await?;
155				Ok(CreateNewIndex())
156			},
157		}
158	} else {
159		// Create new index
160		Ok(CreateNewIndex())
161	}
162}
163
164/// Create a new empty index
165fn CreateNewIndex() -> FileIndex {
166	use crate::Indexing::State::CreateState::CreateNewIndex as StateCreateNewIndex;
167	StateCreateNewIndex()
168}
169
170/// Ensure index directory exists with proper error handling
171pub async fn EnsureIndexDirectory(index_directory:&Path) -> Result<()> {
172	tokio::fs::create_dir_all(index_directory).await.map_err(|e| {
173		AirError::Configuration(format!("Failed to create index directory {}: {}", index_directory.display(), e))
174	})?;
175	Ok(())
176}
177
178/// Backup corrupted index before creating new one
179pub async fn BackupCorruptedIndex(index_directory:&Path) -> Result<()> {
180	let index_file = index_directory.join("file_index.json");
181	let backup_file = index_directory.join(format!("file_index.corrupted.{}.json", chrono::Utc::now().timestamp()));
182
183	if !index_file.exists() {
184		return Ok(());
185	}
186
187	// Rename corrupted file to backup
188	tokio::fs::rename(&index_file, &backup_file)
189		.await
190		.map_err(|e| AirError::FileSystem(format!("Failed to backup corrupted index: {}", e)))?;
191
192	log::info!("[StoreEntry] Backed up corrupted index to: {}", backup_file.display());
193
194	Ok(())
195}
196
197/// Load index with automatic recovery on corruption
198pub async fn LoadIndexWithRecovery(index_directory:&Path, max_retries:usize) -> Result<FileIndex> {
199	let mut last_error = None;
200
201	for attempt in 0..max_retries {
202		match LoadOrCreateIndex(index_directory).await {
203			Ok(index) => {
204				if attempt > 0 {
205					log::info!("[StoreEntry] Successfully loaded index after {} attempts", attempt + 1);
206				}
207				return Ok(index);
208			},
209			Err(e) => {
210				last_error = Some(e);
211				log::warn!("[StoreEntry] Load attempt {} failed", attempt + 1);
212
213				// Wait before retry
214				if attempt < max_retries - 1 {
215					tokio::time::sleep(tokio::time::Duration::from_millis(100 * (attempt + 1) as u64)).await;
216				}
217			},
218		}
219	}
220
221	Err(last_error.unwrap_or_else(|| AirError::Internal("Failed to load index after retries".to_string())))
222}
223
224/// Get index file path
225pub fn GetIndexFilePath(index_directory:&Path) -> PathBuf { index_directory.join("file_index.json") }
226
227/// Check if index file exists and is readable
228pub async fn IndexFileExists(index_directory:&Path) -> Result<bool> {
229	let index_file = index_directory.join("file_index.json");
230
231	if !index_file.exists() {
232		return Ok(false);
233	}
234
235	// Try to read metadata to verify accessibility
236	match tokio::fs::metadata(&index_file).await {
237		Ok(_) => Ok(true),
238		Err(_) => Ok(false),
239	}
240}
241
242/// Get index file size in bytes
243pub async fn GetIndexFileSize(index_directory:&Path) -> Result<u64> {
244	let index_file = index_directory.join("file_index.json");
245
246	let metadata = tokio::fs::metadata(&index_file)
247		.await
248		.map_err(|e| AirError::FileSystem(format!("Failed to get index file metadata: {}", e)))?;
249
250	Ok(metadata.len())
251}
252
253/// Clean up old backup files
254pub async fn CleanupOldBackups(index_directory:&Path, keep_count:usize) -> Result<usize> {
255	let mut entries = tokio::fs::read_dir(index_directory)
256		.await
257		.map_err(|e| AirError::FileSystem(format!("Failed to read index directory: {}", e)))?;
258
259	let mut backups = Vec::new();
260
261	while let Some(entry) = entries
262		.next_entry()
263		.await
264		.map_err(|e| AirError::FileSystem(format!("Failed to read directory entry: {}", e)))?
265	{
266		let file_name = entry.file_name().to_string_lossy().to_string();
267
268		if file_name.starts_with("file_index.corrupted.") && file_name.ends_with(".json") {
269			if let Ok(metadata) = entry.metadata().await {
270				if let Ok(modified) = metadata.modified() {
271					backups.push((entry.path(), modified));
272				}
273			}
274		}
275	}
276
277	// Sort by modified time (oldest first)
278	backups.sort_by_key(|b| b.1);
279
280	let mut removed_count = 0;
281
282	// Remove old backups beyond keep_count
283	for (path, _) in backups.iter().take(backups.len().saturating_sub(keep_count)) {
284		match tokio::fs::remove_file(path).await {
285			Ok(_) => {
286				log::info!("[StoreEntry] Removed old backup: {}", path.display());
287				removed_count += 1;
288			},
289			Err(e) => {
290				log::warn!("[StoreEntry] Failed to remove backup {}: {}", path.display(), e);
291			},
292		}
293	}
294
295	Ok(removed_count)
296}
297
298/// Validate index file format before loading
299pub async fn ValidateIndexFormat(index_directory:&Path) -> Result<()> {
300	let index_file = index_directory.join("file_index.json");
301
302	let content = tokio::fs::read_to_string(&index_file)
303		.await
304		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
305
306	// Try to parse as JSON
307	let _:serde_json::Value = serde_json::from_str(&content)
308		.map_err(|e| AirError::Serialization(format!("Index file is not valid JSON: {}", e)))?;
309
310	Ok(())
311}