Explicitly set the html parser to make sure no extra tags get added.
BeautifulSoup supports multiple html parsers. Some of those parsers try to make the html valid by adding/removing tags[1]. This can lead to useless html, head & body tags in the final document. By explicitly setting the parser to ’html.parser’ this behaviour can be avoided. [1] http://www.crummy.com/software/BeautifulSoup/bs4/doc/#differences-between-parsers
This commit is contained in:
@@ -14,7 +14,7 @@ from pelican import signals, readers, contents
|
|||||||
def extract_toc(content):
|
def extract_toc(content):
|
||||||
if isinstance(content, contents.Static):
|
if isinstance(content, contents.Static):
|
||||||
return
|
return
|
||||||
soup = BeautifulSoup(content._content)
|
soup = BeautifulSoup(content._content,'html.parser')
|
||||||
filename = content.source_path
|
filename = content.source_path
|
||||||
extension = path.splitext(filename)[1][1:]
|
extension = path.splitext(filename)[1][1:]
|
||||||
toc = ''
|
toc = ''
|
||||||
|
|||||||
Reference in New Issue
Block a user