add w3c validator plugin

This commit is contained in:
José Moreira
2013-04-16 22:18:37 +01:00
parent 45b3094247
commit e5de6be179
3 changed files with 97 additions and 0 deletions

26
w3c_validate/README.md Normal file
View File

@@ -0,0 +1,26 @@
# w3c_validate plugin
W3C validator (http://validator.w3.org) plugin for generated HTML content.
After all content is generated, output folder is traversed for HTML files, and
their content validated on W3C and the results displayed, for example:
-> writing /tmp/_output/sitemap.xml
-> Validating: /tmp/_output/archives.html
ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here).
-> Validating: /tmp/_output/categories.html
ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here).
## Dependencies
* py_w3c, https://pypi.python.org/pypi/py_w3c/0.1.0 , which can be installed with pip:
$ pip install py_w3c
## TODO
[ ] - add tests

2
w3c_validate/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
from .wc3_validate import *

View File

@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
"""
W3C HTML Validator plugin for genrated content.
"""
from pelican import signals
import logging
import os
LOG = logging.getLogger(__name__)
INCLUDE_TYPES = ['html']
def validate_files(pelican):
"""
Validate a generated HTML file
:param pelican: pelican object
"""
for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
for name in filenames:
if should_validate(name):
filepath = os.path.join(dirpath, name)
validate(filepath)
def validate(filename):
"""
Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
:param filename: the filename to validate
"""
import HTMLParser
from py_w3c.validators.html.validator import HTMLValidator
h = HTMLParser.HTMLParser() # for unescaping WC3 messages
vld = HTMLValidator()
LOG.info("Validating: {0}".format(filename))
# call w3c webservice
vld.validate_file(filename)
# display errors and warning
for err in vld.errors:
LOG.error(u'line: {0}; col: {1}; message: {2}'.
format(err['line'], err['col'], h.unescape(err['message']))
)
for err in vld.warnings:
LOG.warning(u'line: {0}; col: {1}; message: {2}'.
format(err['line'], err['col'], h.unescape(err['message']))
)
def should_validate(filename):
"""Check if the filename is a type of file that should be validated.
:param filename: A file name to check against
"""
for extension in INCLUDE_TYPES:
if filename.endswith(extension):
return True
return False
def register():
"""
Register Pelican signal for validating content after it is generated.
"""
signals.finalized.connect(validate_files)