diff --git a/w3c_validate/README.md b/w3c_validate/README.md new file mode 100644 index 0000000..eea646a --- /dev/null +++ b/w3c_validate/README.md @@ -0,0 +1,26 @@ +# w3c_validate plugin + +W3C validator (http://validator.w3.org) plugin for generated HTML content. + +After all content is generated, output folder is traversed for HTML files, and +their content validated on W3C and the results displayed, for example: + + -> writing /tmp/_output/sitemap.xml + -> Validating: /tmp/_output/archives.html + ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here). + -> Validating: /tmp/_output/categories.html + ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here). + +## Dependencies + +* py_w3c, https://pypi.python.org/pypi/py_w3c/0.1.0 , which can be installed with pip: + + $ pip install py_w3c + +## TODO + +[ ] - add tests + + + + diff --git a/w3c_validate/__init__.py b/w3c_validate/__init__.py new file mode 100644 index 0000000..f01ad6e --- /dev/null +++ b/w3c_validate/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +from .wc3_validate import * diff --git a/w3c_validate/wc3_validate.py b/w3c_validate/wc3_validate.py new file mode 100644 index 0000000..f4e8c5c --- /dev/null +++ b/w3c_validate/wc3_validate.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" +W3C HTML Validator plugin for genrated content. +""" + + +from pelican import signals +import logging +import os + +LOG = logging.getLogger(__name__) + +INCLUDE_TYPES = ['html'] + + +def validate_files(pelican): + """ + Validate a generated HTML file + :param pelican: pelican object + """ + for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']): + for name in filenames: + if should_validate(name): + filepath = os.path.join(dirpath, name) + validate(filepath) + + +def validate(filename): + """ + Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . + :param filename: the filename to validate + """ + import HTMLParser + from py_w3c.validators.html.validator import HTMLValidator + + h = HTMLParser.HTMLParser() # for unescaping WC3 messages + + vld = HTMLValidator() + LOG.info("Validating: {0}".format(filename)) + + # call w3c webservice + vld.validate_file(filename) + + # display errors and warning + for err in vld.errors: + LOG.error(u'line: {0}; col: {1}; message: {2}'. + format(err['line'], err['col'], h.unescape(err['message'])) + ) + for err in vld.warnings: + LOG.warning(u'line: {0}; col: {1}; message: {2}'. + format(err['line'], err['col'], h.unescape(err['message'])) + ) + + +def should_validate(filename): + """Check if the filename is a type of file that should be validated. + :param filename: A file name to check against + """ + for extension in INCLUDE_TYPES: + if filename.endswith(extension): + return True + return False + + +def register(): + """ + Register Pelican signal for validating content after it is generated. + """ + signals.finalized.connect(validate_files)