From 9a952032f43ee611e2db2ce22457c6b81e2c65ec Mon Sep 17 00:00:00 2001 From: zhouji Date: Mon, 16 Dec 2013 17:06:12 +0800 Subject: [PATCH 1/2] Escape "^" in the json file because it is a special character for tique search. --- tipue_search/tipue_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tipue_search/tipue_search.py b/tipue_search/tipue_search.py index 83ad735..8b9b03a 100644 --- a/tipue_search/tipue_search.py +++ b/tipue_search/tipue_search.py @@ -37,7 +37,7 @@ class Tipue_Search_JSON_Generator(object): page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'") soup_text = BeautifulSoup(page.content) - page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ') + page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^') page_text = ' '.join(page_text.split()) if getattr(page, 'category') == 'None': From 1c6fa7893e6d603da77dbc85172b10458bad1ae7 Mon Sep 17 00:00:00 2001 From: zhouji Date: Mon, 16 Dec 2013 17:13:08 +0800 Subject: [PATCH 2/2] Also escape "^" for page tile in the tipue json file. --- tipue_search/tipue_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tipue_search/tipue_search.py b/tipue_search/tipue_search.py index 8b9b03a..b0c8f7b 100644 --- a/tipue_search/tipue_search.py +++ b/tipue_search/tipue_search.py @@ -34,7 +34,7 @@ class Tipue_Search_JSON_Generator(object): return soup_title = BeautifulSoup(page.title.replace(' ', ' ')) - page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'") + page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^') soup_text = BeautifulSoup(page.content) page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')