1use std::path::PathBuf;
67
68use regex::Regex;
69
70use crate::{AirError, Result};
71use crate::Indexing::State::CreateState::{FileIndex, FileMetadata};
73
74pub const MAX_SEARCH_RESULTS_DEFAULT:u32 = 100;
76
77#[derive(Debug, Clone)]
79pub struct SearchQuery {
80 pub query:String,
82 pub mode:SearchMode,
84 pub case_sensitive:bool,
86 pub whole_word:bool,
88 pub regex:Option<Regex>,
90 pub max_results:u32,
92 pub page:u32,
94}
95
96#[derive(Debug, Clone, PartialEq)]
98pub enum SearchMode {
99 Literal,
101 Regex,
103 Fuzzy,
105 Exact,
107}
108
109#[derive(Debug, Clone)]
111pub struct SearchResult {
112 pub path:String,
114 pub file_name:String,
116 pub matches:Vec<SearchMatch>,
118 pub relevance:f64,
120 pub language:Option<String>,
122}
123
124#[derive(Debug, Clone)]
126pub struct SearchMatch {
127 pub line_number:u32,
129 pub line_content:String,
131 pub match_start:usize,
133 pub match_end:usize,
135 pub context_before:Vec<String>,
137 pub context_after:Vec<String>,
139}
140
141#[derive(Debug, Clone)]
143pub struct PaginatedSearchResults {
144 pub results:Vec<SearchResult>,
146 pub total_count:u32,
148 pub page:u32,
150 pub total_pages:u32,
152 pub page_size:u32,
154}
155
156impl IntoIterator for PaginatedSearchResults {
157 type Item = SearchResult;
158 type IntoIter = std::vec::IntoIter<SearchResult>;
159
160 fn into_iter(self) -> Self::IntoIter { self.results.into_iter() }
161}
162
163impl<'a> IntoIterator for &'a PaginatedSearchResults {
164 type Item = &'a SearchResult;
165 type IntoIter = std::slice::Iter<'a, SearchResult>;
166
167 fn into_iter(self) -> Self::IntoIter { self.results.iter() }
168}
169
170pub async fn QueryIndexSearch(
182 index:&FileIndex,
183 query:SearchQuery,
184 path_filter:Option<String>,
185 language_filter:Option<String>,
186) -> Result<PaginatedSearchResults> {
187 log::info!("[QueryIndex] Searching for: '{}' (mode: {:?})", query.query, query.mode);
188
189 let sanitized_query = SanitizeSearchQuery(&query.query)?;
191
192 let case_sensitive = query.case_sensitive;
194 let whole_word = query.whole_word;
195 let max_results = if query.max_results == 0 {
196 MAX_SEARCH_RESULTS_DEFAULT
197 } else {
198 query.max_results.min(1000) };
200
201 let mut all_results = Vec::new();
202
203 match query.mode {
205 SearchMode::Literal => {
206 QueryIndexLiteral(
207 &sanitized_query,
208 case_sensitive,
209 whole_word,
210 path_filter.as_deref(),
211 language_filter.as_deref(),
212 index,
213 &mut all_results,
214 )
215 .await;
216 },
217 SearchMode::Regex => {
218 if let Some(regex) = &query.regex {
219 QueryIndexRegex(
220 regex,
221 path_filter.as_deref(),
222 language_filter.as_deref(),
223 index,
224 &mut all_results,
225 )
226 .await;
227 } else {
228 if let Ok(regex) = Regex::new(&sanitized_query) {
230 QueryIndexRegex(
231 ®ex,
232 path_filter.as_deref(),
233 language_filter.as_deref(),
234 index,
235 &mut all_results,
236 )
237 .await;
238 }
239 }
240 },
241 SearchMode::Fuzzy => {
242 QueryIndexFuzzy(
243 &sanitized_query,
244 case_sensitive,
245 path_filter.as_deref(),
246 language_filter.as_deref(),
247 index,
248 &mut all_results,
249 )
250 .await;
251 },
252 SearchMode::Exact => {
253 QueryIndexExact(
254 &sanitized_query,
255 case_sensitive,
256 path_filter.as_deref(),
257 language_filter.as_deref(),
258 index,
259 &mut all_results,
260 )
261 .await;
262 },
263 }
264
265 all_results.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap());
267
268 let total_count = all_results.len() as u32;
270 let total_pages = if max_results == 0 { 0 } else { total_count.div_ceil(max_results) };
271 let page = query.page.min(total_pages.saturating_sub(1));
272
273 let start = (page * max_results) as usize;
275 let end = ((page + 1) * max_results).min(total_count) as usize;
276 let page_results = all_results[start..end].to_vec();
277
278 log::info!(
279 "[QueryIndex] Search completed: {} total results, page {} of {}",
280 total_count,
281 page + 1,
282 total_pages
283 );
284
285 Ok(PaginatedSearchResults { results:page_results, total_count, page, total_pages, page_size:max_results })
286}
287
288pub fn SanitizeSearchQuery(query:&str) -> Result<String> {
290 let sanitized:String = query.chars().filter(|c| *c != '\0' && !c.is_control()).collect();
292
293 if sanitized.len() > 1000 {
295 return Err(AirError::Validation(
296 "Search query exceeds maximum length of 1000 characters".to_string(),
297 ));
298 }
299
300 Ok(sanitized)
301}
302
303async fn QueryIndexLiteral(
305 query:&str,
306 case_sensitive:bool,
307 whole_word:bool,
308 path_filter:Option<&str>,
309 language_filter:Option<&str>,
310 index:&FileIndex,
311 results:&mut Vec<SearchResult>,
312) {
313 let search_query = if case_sensitive { query.to_string() } else { query.to_lowercase() };
314
315 if let Some(file_paths) = index.content_index.get(&search_query.to_lowercase()) {
317 for file_path in file_paths {
318 if let Some(metadata) = index.files.get(file_path) {
319 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
320 if let Ok(search_result) =
321 FindMatchesInFile(file_path, &search_query, case_sensitive, whole_word, index).await
322 {
323 if !search_result.matches.is_empty() {
324 results.push(search_result);
325 }
326 }
327 }
328 }
329 }
330 }
331
332 for (file_path, metadata) in &index.files {
334 if results.len() >= 1000 {
335 break;
336 }
337
338 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
339
340 let name_to_search = if case_sensitive { file_name.clone() } else { file_name.to_lowercase() };
341
342 if name_to_search.contains(&search_query) {
343 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
344 results.push(SearchResult {
346 path:file_path.to_string_lossy().to_string(),
347 file_name,
348 matches:Vec::new(),
349 relevance:0.3,
350 language:metadata.language.clone(),
351 });
352 }
353 }
354 }
355}
356
357async fn QueryIndexRegex(
359 regex:&Regex,
360 path_filter:Option<&str>,
361 language_filter:Option<&str>,
362 index:&FileIndex,
363 results:&mut Vec<SearchResult>,
364) {
365 for (file_path, metadata) in &index.files {
366 if results.len() >= 1000 {
367 break;
368 }
369
370 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
371 continue;
372 }
373
374 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
375 let matches = FindRegexMatches(&content, regex);
376
377 if !matches.is_empty() {
378 let relevance = CalculateRelevance(&matches, metadata);
379
380 results.push(SearchResult {
381 path:file_path.to_string_lossy().to_string(),
382 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
383 matches,
384 relevance,
385 language:metadata.language.clone(),
386 });
387 }
388 }
389 }
390}
391
392async fn QueryIndexFuzzy(
394 query:&str,
395 case_sensitive:bool,
396 path_filter:Option<&str>,
397 language_filter:Option<&str>,
398 index:&FileIndex,
399 results:&mut Vec<SearchResult>,
400) {
401 let query_lower = query.to_lowercase();
402
403 for (file_path, metadata) in &index.files {
404 if results.len() >= 1000 {
405 break;
406 }
407
408 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
409 continue;
410 }
411
412 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
413 let max_distance = 2; let matches = FindFuzzyMatches(&content, &query_lower, case_sensitive, max_distance);
415
416 if !matches.is_empty() {
417 let relevance = CalculateRelevance(&matches, metadata) * 0.8; results.push(SearchResult {
420 path:file_path.to_string_lossy().to_string(),
421 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
422 matches,
423 relevance,
424 language:metadata.language.clone(),
425 });
426 }
427 }
428 }
429}
430
431async fn QueryIndexExact(
433 query:&str,
434 _case_sensitive:bool,
435 path_filter:Option<&str>,
436 language_filter:Option<&str>,
437 index:&FileIndex,
438 results:&mut Vec<SearchResult>,
439) {
440 for (file_path, metadata) in &index.files {
441 if results.len() >= 1000 {
442 break;
443 }
444
445 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
446 continue;
447 }
448
449 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
450 let matches = FindExactMatches(&content, query);
451
452 if !matches.is_empty() {
453 let relevance = CalculateRelevance(&matches, metadata) * 1.1; results.push(SearchResult {
456 path:file_path.to_string_lossy().to_string(),
457 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
458 matches,
459 relevance,
460 language:metadata.language.clone(),
461 });
462 }
463 }
464 }
465}
466
467async fn FindMatchesInFile(
469 file_path:&PathBuf,
470 query:&str,
471 case_sensitive:bool,
472 whole_word:bool,
473 index:&FileIndex,
474) -> Result<SearchResult> {
475 let content = tokio::fs::read_to_string(file_path)
476 .await
477 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
478
479 let metadata = index
480 .files
481 .get(file_path)
482 .ok_or_else(|| AirError::Internal("File metadata not found in index".to_string()))?;
483
484 let matches = FindMatchesWithContext(&content, query, case_sensitive, whole_word);
485 let relevance = CalculateRelevance(&matches, metadata);
486
487 Ok(SearchResult {
488 path:file_path.to_string_lossy().to_string(),
489 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
490 matches,
491 relevance,
492 language:metadata.language.clone(),
493 })
494}
495
496fn FindMatchesWithContext(content:&str, query:&str, case_sensitive:bool, whole_word:bool) -> Vec<SearchMatch> {
498 let mut matches = Vec::new();
499 let lines:Vec<&str> = content.lines().collect();
500
501 let search_in = |line:&str| -> Option<(usize, usize)> {
502 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
503
504 let query_to_find = if case_sensitive { query.to_string() } else { query.to_lowercase() };
505
506 let start = if whole_word {
507 FindWholeWordMatch(&line_to_search, &query_to_find)
508 } else {
509 line_to_search.find(&query_to_find)
510 };
511
512 start.map(|s| (s, s + query.len()))
513 };
514
515 for (line_idx, line) in lines.iter().enumerate() {
516 let line_number = line_idx as u32 + 1;
517
518 if let Some((match_start, match_end)) = search_in(line) {
519 let context_start = line_idx.saturating_sub(2);
521 let context_end = (line_idx + 3).min(lines.len());
522
523 let context_before = lines[context_start..line_idx].iter().map(|s| s.to_string()).collect();
524
525 let context_after = lines[line_idx + 1..context_end].iter().map(|s| s.to_string()).collect();
526
527 matches.push(SearchMatch {
528 line_number,
529 line_content:line.to_string(),
530 match_start,
531 match_end,
532 context_before,
533 context_after,
534 });
535 }
536 }
537
538 matches
539}
540
541fn FindWholeWordMatch(line:&str, word:&str) -> Option<usize> {
543 let mut start = 0;
544
545 while let Some(pos) = line[start..].find(word) {
546 let actual_pos = start + pos;
547
548 let valid_before = actual_pos == 0
550 || line
551 .chars()
552 .nth(actual_pos - 1)
553 .map_or(true, |c| !c.is_alphanumeric() && c != '_');
554
555 let match_end = actual_pos + word.len();
557 let valid_after =
558 match_end == line.len() || line.chars().nth(match_end).map_or(true, |c| !c.is_alphanumeric() && c != '_');
559
560 if valid_before && valid_after {
561 return Some(actual_pos);
562 }
563
564 start = actual_pos + 1;
565 }
566
567 None
568}
569
570fn FindRegexMatches(content:&str, regex:&Regex) -> Vec<SearchMatch> {
572 let mut matches = Vec::new();
573 let lines:Vec<&str> = content.lines().collect();
574
575 for (line_idx, line) in lines.iter().enumerate() {
576 let line_number = line_idx as u32 + 1;
577
578 for mat in regex.find_iter(line) {
579 matches.push(SearchMatch {
580 line_number,
581 line_content:line.to_string(),
582 match_start:mat.start(),
583 match_end:mat.end(),
584 context_before:Vec::new(),
585 context_after:Vec::new(),
586 });
587 }
588 }
589
590 matches
591}
592
593fn FindFuzzyMatches(content:&str, query:&str, case_sensitive:bool, max_distance:usize) -> Vec<SearchMatch> {
595 let mut matches = Vec::new();
596 let lines:Vec<&str> = content.lines().collect();
597
598 for (line_idx, line) in lines.iter().enumerate() {
599 let line_number = line_idx as u32 + 1;
600 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
601
602 if let Some(pos) = line_to_search.find(query) {
604 let distance = CalculateLevenshteinDistance(&line_to_search[pos..pos.saturating_add(query.len())], query);
606
607 if distance <= max_distance {
608 matches.push(SearchMatch {
609 line_number,
610 line_content:line.to_string(),
611 match_start:pos,
612 match_end:pos + query.len(),
613 context_before:Vec::new(),
614 context_after:Vec::new(),
615 });
616 }
617 }
618 }
619
620 matches
621}
622
623fn FindExactMatches(content:&str, query:&str) -> Vec<SearchMatch> { FindMatchesWithContext(content, query, true, true) }
625
626fn CalculateLevenshteinDistance(s1:&str, s2:&str) -> usize {
628 let s1_chars:Vec<char> = s1.chars().collect();
629 let s2_chars:Vec<char> = s2.chars().collect();
630 let len1 = s1_chars.len();
631 let len2 = s2_chars.len();
632
633 let mut dp = vec![vec![0usize; len2 + 1]; len1 + 1];
635
636 for i in 0..=len1 {
638 dp[i][0] = i;
639 }
640 for j in 0..=len2 {
641 dp[0][j] = j;
642 }
643
644 for i in 1..=len1 {
646 for j in 1..=len2 {
647 if s1_chars[i - 1] == s2_chars[j - 1] {
648 dp[i][j] = dp[i - 1][j - 1];
649 } else {
650 dp[i][j] = 1 + [
651 dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1], ]
655 .into_iter()
656 .min()
657 .unwrap();
658 }
659 }
660 }
661
662 dp[len1][len2]
663}
664
665fn CalculateRelevance(matches:&[SearchMatch], metadata:&FileMetadata) -> f64 {
667 let match_count = matches.len();
668 let line_count = metadata.line_count.unwrap_or(1) as f64;
669
670 let mut relevance = (match_count as f64 / line_count) * 10.0;
672
673 if match_count > 0 {
675 relevance += (match_count as f64).log10() * 0.5;
676 }
677
678 let days_old = (chrono::Utc::now() - metadata.modified).num_days() as f64;
680 relevance += 1.0 / (days_old + 1.0).max(1.0);
681
682 relevance.min(10.0).max(0.0)
683}
684
685pub fn MatchesFilters(
687 file_path:&PathBuf,
688 metadata:&FileMetadata,
689 path_filter:Option<&str>,
690 language_filter:Option<&str>,
691) -> bool {
692 if let Some(search_path) = path_filter {
694 if !file_path.to_string_lossy().contains(search_path) {
695 return false;
696 }
697 }
698
699 if let Some(lang) = language_filter {
701 if metadata.language.as_deref() != Some(lang) {
702 return false;
703 }
704 }
705
706 true
707}