From baa90d815e4d3747763c0e1a9367f2ac2772a71a Mon Sep 17 00:00:00 2001 From: Talha Mansoor Date: Sun, 10 Nov 2013 14:49:49 +0500 Subject: [PATCH] Minify JSON to improve search speed --- tipue_search/tipue_search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tipue_search/tipue_search.py b/tipue_search/tipue_search.py index 2959076..83ad735 100644 --- a/tipue_search/tipue_search.py +++ b/tipue_search/tipue_search.py @@ -33,10 +33,12 @@ class Tipue_Search_JSON_Generator(object): if getattr(page, 'status', 'published') != 'published': return - page_title = page.title + soup_title = BeautifulSoup(page.title.replace(' ', ' ')) + page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'") - soup = BeautifulSoup(page.content, 'html.parser') - page_text = soup.get_text() + soup_text = BeautifulSoup(page.content) + page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ') + page_text = ' '.join(page_text.split()) if getattr(page, 'category') == 'None': page_category = '' @@ -65,7 +67,7 @@ class Tipue_Search_JSON_Generator(object): root_node = {'pages': self.json_nodes} with open(path, 'w', encoding='utf-8') as fd: - json.dump(root_node, fd, indent=4) + json.dump(root_node, fd, separators=(',', ':')) def get_generators(generators):