Support unicode case-insensitive matching

This commit is contained in:
pontus.horn
2019-08-17 23:45:25 +02:00
parent 88719e5b3d
commit 76e9bab587

View File

@@ -125,7 +125,7 @@ class PicoSearch extends AbstractPicoPlugin
public function getSearchRankForPage($page) { public function getSearchRankForPage($page) {
// If there's an exact match in the title, skip a bunch of work and give it a very high score // If there's an exact match in the title, skip a bunch of work and give it a very high score
$escaped_search_terms = preg_quote($this->search_terms, '/'); $escaped_search_terms = preg_quote($this->search_terms, '/');
if (preg_match("/\b$escaped_search_terms\b/i", $page['title']) === 1) { if (preg_match("/\b$escaped_search_terms\b/iu", $page['title']) === 1) {
return 5; return 5;
} }
@@ -154,17 +154,17 @@ class PicoSearch extends AbstractPicoPlugin
$searchTermValue = $this->isLowValueWord($searchTerm) ? 0.2 : 1; $searchTermValue = $this->isLowValueWord($searchTerm) ? 0.2 : 1;
$escapedSearchTerm = preg_quote($searchTerm, '/'); $escapedSearchTerm = preg_quote($searchTerm, '/');
$fullWordMatches = preg_match_all("/\b$escapedSearchTerm\b/i", $content); $fullWordMatches = preg_match_all("/\b$escapedSearchTerm\b/iu", $content);
if ($fullWordMatches > 0) { if ($fullWordMatches > 0) {
return min($fullWordMatches, 3) * $searchTermValue; return min($fullWordMatches, 3) * $searchTermValue;
} }
$startOfWordMatches = preg_match_all("/\b$escapedSearchTerm\B/i", $content); $startOfWordMatches = preg_match_all("/\b$escapedSearchTerm\B/iu", $content);
if ($startOfWordMatches > 0) { if ($startOfWordMatches > 0) {
return min($startOfWordMatches, 3) * 0.5 * $searchTermValue; return min($startOfWordMatches, 3) * 0.5 * $searchTermValue;
} }
$inWordMatches = preg_match_all("/\B$escapedSearchTerm\B/i", $content); $inWordMatches = preg_match_all("/\B$escapedSearchTerm\B/iu", $content);
return min($inWordMatches, 3) * 0.05 * $searchTermValue; return min($inWordMatches, 3) * 0.05 * $searchTermValue;
} }