Mountain/Environment/
SearchProvider.rs

1//! # SearchProvider (Environment)
2//!
3//! Implements the `SearchProvider` trait for `MountainEnvironment`, providing
4//! text search capabilities across files and content within the workspace.
5//!
6//! ## RESPONSIBILITIES
7//!
8//! ### 1. Search Execution
9//! - Search for text patterns in files using glob patterns
10//! - Support regular expression search
11//! - Search file contents and/or file names
12//! - Handle large result sets efficiently
13//!
14//! ### 2. Search Results
15//! - Return structured search results with matches
16//! - Include file URI, line number, column, and matching text
17//! - Support paging and result limiting
18//! - Sort results by relevance or file order
19//!
20//! ### 3. Search Configuration
21//! - Respect workspace file exclusion patterns (.gitignore)
22//! - Honor file size limits for search
23//! - Support case-sensitive and whole-word matching
24//! - Handle symbolic links appropriately
25//!
26//! ### 4. Search Cancellation
27//! - Support cancellation of long-running searches
28//! - Clean up resources on cancellation
29//! - Provide progress feedback (optional)
30//!
31//! ## ARCHITECTURAL ROLE
32//!
33//! SearchProvider is the **workspace search engine**:
34//!
35//! ```text
36//! Search Request ──► SearchProvider ──► FileSystem Scan ──► Results
37//! ```
38//!
39//! ### Position in Mountain
40//! - `Environment` module: Search capability provider
41//! - Implements `CommonLibrary::Search::SearchProvider` trait
42//! - Accessible via `Environment.Require<dyn SearchProvider>()`
43//!
44//! ### Search Types Supported
45//! - **Text search**: Find files containing text pattern
46//! - **File search**: Find files by name/glob pattern
47//! - **Replace**: (Future) Search and replace operations
48//! - **Context search**: (Future) Search with surrounding context
49//!
50//! ### Dependencies
51//! - `FileSystemReader`: Read file contents for searching
52//! - `WorkspaceProvider`: Get workspace folders to search
53//! - `Log`: Search progress and errors
54//!
55//! ### Dependents
56//! - Search UI panel: User-initiated searches
57//! - Find/Replace dialogs: In-editor search
58//! - Grep-like command-line operations
59//! - Code navigation (symbol search)
60//!
61//! ## SEARCH PROCESS
62//!
63//! 1. **File Discovery**: Walk workspace directories, respecting exclusions
64//! 2. **File Filtering**: Match filenames against include/exclude patterns
65//! 3. **Content Search**: For each file, search for pattern in content
66//! 4. **Match Collection**: Record matches with position information
67//! 5. **Result Formatting**: Return structured search results
68//!
69//! ## PERFORMANCE CONSIDERATIONS
70//!
71//! - Search is I/O bound; consider async and parallel processing
72//! - Large workspaces may have thousands of files
73//! - Use file size limits to prevent memory exhaustion
74//! - Implement result paging for UI responsiveness
75//! - Consider background search indexing for faster repeated searches
76//!
77//! ## ERROR HANDLING
78//!
79//! - Permission denied: Skip file, log warning
80//! - File not found: Skip file (may have been deleted)
81//! - Encoding errors: Try default encoding, skip on failure
82//! - Search cancelled: Stop immediately, return partial results
83//!
84//! ## VS CODE REFERENCE
85//!
86//! Patterns from VS Code:
87//! - `vs/workbench/contrib/search/browser/searchWidget.ts` - Search UI
88//! - `vs/platform/search/common/search.ts` - Search service API
89//! - `vs/platform/search/common/fileSearch.ts` - File system search
90//!
91//! ## TODO
92//!
93//! - [ ] Implement file content indexing for faster searches
94//! - [ ] Add regular expression support with PCRE or regex engine
95//! - [ ] Support search result paging and streaming
96//! - [ ] Add search cancellation with proper cleanup
97//! - [ ] Implement search result highlighting in UI
98//! - [ ] Support search in compressed/archive files
99//! - [ ] Add search across multiple workspaces
100//! - [ ] Implement search history and persistence
101//! - [ ] Add search filters (by language, by file size, etc.)
102//! - [ ] Support search templates and saved searches
103//! - [ ] Implement search result grouping (by folder, by file)
104//! - [ ] Add search performance metrics and logging
105//! - [ ] Support search result export (to file, clipboard)
106//!
107//! ## MODULE CONTENTS
108//!
109//! - [`SearchProvider`]: Main struct implementing the trait
110//! - Search execution methods
111//! - File walking and filtering logic
112//! - Match extraction and formatting
113//! - Search cancellation support
114
115// Responsibilities:
116//   - Perform workspace-wide text searches using `grep-searcher` (the `ripgrep` library).
117//   - Respect workspace folders and standard ignore files (`.gitignore`).
118//   - Collect and format search results into a DTO suitable for the frontend.
119//   - Support regex patterns and case-sensitive/insensitive searches.
120//   - Implement word-boundary matching.
121//   - Optimize for performance with parallel file walking.
122//   - Handle large files efficiently with memory-efficient streaming.
123//   - Support incremental search with result pagination.
124//   - Provide search statistics (matches count, files searched).
125//   - Handle search cancellation gracefully.
126//
127// TODOs:
128//   - Implement result pagination for large result sets
129//   - Add search cancellation via CancellationToken
130//   - Support include/exclude file patterns
131//   - Implement context lines for matches (before/after)
132//   - Add file type filtering (e.g., search only in certain extensions)
133//   - Implement replacement/match highlighting in results
134//   - Add search progress reporting
135//   - Support search across multiple workspace folders independently
136//   - Implement search caching for repeated searches
137//   - Add regex capture groups support
138//   - Implement search history and recent searches
139//   - Support search result export
140//   - Add search performance metrics and optimization
141//   - Implement search result deduplication
142//   - Support glob patterns for file matching
143//   - Add search result ranking and sorting
144//   - Implement binary file handling (skip or search)
145//   - Support symbolic link following
146//   - Add max file size limit to avoid memory issues
147//   - Implement search timeout
148//   - Support search in hidden files
149//   - Add line and column number precision
150//   - Implement multi-line regex search
151//
152// Inspired by VSCode's search service which:
153// - Uses ripgrep for high-performance text search
154// - Supports complex regex patterns and modifiers
155// - Provides context lines for matches
156// - Handles large directories efficiently
157// - Supports file and directory exclusions
158// - Provides incremental search results
159// - Handles search cancellation gracefully
160//! # SearchProvider Implementation
161//!
162//! Implements the `SearchProvider` trait using the `grep-searcher` crate, which
163//! is a library for the `ripgrep` search tool.
164//!
165//! ## Search Architecture
166//!
167//! The search implementation uses a multi-threaded approach:
168//!
169//! 1. **Pattern Compilation**: Regex pattern is compiled with modifiers
170//! 2. **Parallel Walking**: Files in workspace are walked in parallel
171//! 3. **Per-File Search**: Each file is searched individually using a sink
172//!    pattern
173//! 4. **Result Aggregation**: Matches are collected in a shared thread-safe
174//!    vector
175//!
176//! ## Search Features
177//!
178//! - **Case Sensitivity**: Controlled by `is_case_sensitive` option
179//! - **Word Matching**: Controlled by `is_word_match` option
180//! - **Regex Support**: Full regex pattern matching via `grep-regex`
181//! - **Ignore Files**: Respects `.gitignore`, `.ignore`, and other ignore files
182//! - **Parallel Search**: Uses `WalkBuilder::build_parallel()` for performance
183//! - **Memory Efficient**: Streams results to avoid loading entire files
184//!
185//! ## Search Result Format
186//!
187//! Each match includes:
188//! - **File URI**: Valid URL pointing to the file
189//! - **Line Number**: Zero-indexed line number of the match
190//! - **Preview**: The matched text line
191//!
192//! Results are grouped by file, with each file containing multiple matches.
193//
194
195use std::{
196	io,
197	path::PathBuf,
198	sync::{Arc, Mutex},
199};
200
201use CommonLibrary::{Error::CommonError::CommonError, Search::SearchProvider::SearchProvider};
202use async_trait::async_trait;
203use grep_regex::RegexMatcherBuilder;
204use grep_searcher::{Searcher, Sink, SinkMatch};
205use ignore::WalkBuilder;
206use log::{info, warn};
207use serde::{Deserialize, Serialize};
208use serde_json::{Value, json};
209
210use super::{MountainEnvironment::MountainEnvironment, Utility};
211
212#[derive(Deserialize, Debug)]
213#[serde(rename_all = "camelCase")]
214struct TextSearchQuery {
215	pattern:String,
216
217	is_case_sensitive:Option<bool>,
218
219	is_word_match:Option<bool>,
220}
221
222#[derive(Serialize, Clone, Debug)]
223#[serde(rename_all = "camelCase")]
224struct TextMatch {
225	preview:String,
226
227	line_number:u64,
228}
229
230#[derive(Serialize, Clone, Debug)]
231#[serde(rename_all = "camelCase")]
232struct FileMatch {
233	// URI
234	resource:String,
235
236	matches:Vec<TextMatch>,
237}
238
239// This Sink is designed to be created for each file. It holds a reference to
240// the central results vector and the path of the file it's searching.
241struct PerFileSink {
242	path:PathBuf,
243
244	results:Arc<Mutex<Vec<FileMatch>>>,
245}
246
247impl Sink for PerFileSink {
248	type Error = io::Error;
249
250	fn matched(&mut self, _Searcher:&Searcher, Mat:&SinkMatch<'_>) -> Result<bool, Self::Error> {
251		let mut ResultsGuard = self
252			.results
253			.lock()
254			.map_err(|Error| io::Error::new(io::ErrorKind::Other, Error.to_string()))?;
255
256		let Preview = String::from_utf8_lossy(Mat.bytes()).to_string();
257
258		let LineNumber = Mat.line_number().unwrap_or(0);
259
260		// Since this sink is per-file, we know `self.path` is correct.
261		let FileURI = url::Url::from_file_path(&self.path)
262			.map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "Could not convert path to URL"))?
263			.to_string();
264
265		// Find the entry for our file, or create it if it's the first match.
266		if let Some(FileMatch) = ResultsGuard.iter_mut().find(|fm| fm.resource == FileURI) {
267			FileMatch.matches.push(TextMatch { preview:Preview, line_number:LineNumber });
268		} else {
269			ResultsGuard.push(FileMatch {
270				resource:FileURI,
271
272				matches:vec![TextMatch { preview:Preview, line_number:LineNumber }],
273			});
274		}
275
276		// Continue searching
277		Ok(true)
278	}
279}
280
281#[async_trait]
282impl SearchProvider for MountainEnvironment {
283	async fn TextSearch(&self, QueryValue:Value, _OptionsValue:Value) -> Result<Value, CommonError> {
284		let Query:TextSearchQuery = serde_json::from_value(QueryValue)?;
285
286		info!("[SearchProvider] Performing text search for: {:?}", Query);
287
288		let mut Builder = RegexMatcherBuilder::new();
289
290		Builder
291			.case_insensitive(!Query.is_case_sensitive.unwrap_or(false))
292			.word(Query.is_word_match.unwrap_or(false));
293
294		let Matcher = Builder.build(&Query.pattern).map_err(|Error| {
295			CommonError::InvalidArgument { ArgumentName:"pattern".into(), Reason:Error.to_string() }
296		})?;
297
298		let AllMatches = Arc::new(Mutex::new(Vec::<FileMatch>::new()));
299
300		let Folders = self
301			.ApplicationState
302			.Workspace
303			.WorkspaceFolders
304			.lock()
305			.map_err(Utility::MapApplicationStateLockErrorToCommonError)?
306			.clone();
307
308		if Folders.is_empty() {
309			warn!("[SearchProvider] No workspace folders to search in.");
310
311			return Ok(json!([]));
312		}
313
314		for Folder in Folders {
315			if let Ok(FolderPath) = Folder.URI.to_file_path() {
316				// Use a parallel walker for better performance.
317				let Walker = WalkBuilder::new(FolderPath).build_parallel();
318
319				// The `search_parallel` method is not available on `Searcher`. We must process
320				// entries from the walker and call `search_path` individually.
321				Walker.run(|| {
322					let mut Searcher = Searcher::new();
323
324					let Matcher = Matcher.clone();
325
326					let AllMatches = AllMatches.clone();
327
328					Box::new(move |EntryResult| {
329						if let Ok(Entry) = EntryResult {
330							if Entry.file_type().map_or(false, |ft| ft.is_file()) {
331								// For each file, create a new sink that knows its path.
332								let Sink = PerFileSink { path:Entry.path().to_path_buf(), results:AllMatches.clone() };
333
334								if let Err(Error) = Searcher.search_path(&Matcher, Entry.path(), Sink) {
335									warn!(
336										"[SearchProvider] Error searching path {}: {}",
337										Entry.path().display(),
338										Error
339									);
340								}
341							}
342						}
343
344						ignore::WalkState::Continue
345					})
346				});
347			}
348		}
349
350		let FinalMatches = AllMatches
351			.lock()
352			.map_err(|Error| CommonError::StateLockPoisoned { Context:Error.to_string() })?
353			.clone();
354
355		Ok(json!(FinalMatches))
356	}
357}