From 8d0e643637d349cc5407b3b12b796f0331398444 Mon Sep 17 00:00:00 2001
From: bas smit <bas@baslab.org>
Date: Fri, 24 May 2013 11:12:51 +0200
Subject: [PATCH] Explicitly set the html parser to make sure no extra tags get
 added.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BeautifulSoup supports multiple html parsers. Some of those parsers
try to make the html valid by adding/removing tags[1]. This can lead
to useless html, head & body tags in the final document. By explicitly
setting the parser to ’html.parser’ this behaviour can be avoided.

[1] http://www.crummy.com/software/BeautifulSoup/bs4/doc/#differences-between-parsers
---
 extract_toc/extract_toc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extract_toc/extract_toc.py b/extract_toc/extract_toc.py
index 479970b..a3a7e00 100644
--- a/extract_toc/extract_toc.py
+++ b/extract_toc/extract_toc.py
@@ -14,7 +14,7 @@ from pelican import signals, readers, contents
 def extract_toc(content):
     if isinstance(content, contents.Static):
         return
-    soup = BeautifulSoup(content._content)
+    soup = BeautifulSoup(content._content,'html.parser')
     filename = content.source_path
     extension = path.splitext(filename)[1][1:]
     toc = ''