From 164247d2e35cdb3ebb7e47d1433de4148b6a502d Mon Sep 17 00:00:00 2001 From: m-r-r Date: Wed, 27 Mar 2013 22:56:14 -0400 Subject: [PATCH] Import the sitemap plugin from getpelican/pelican@024ecb30 --- pelicanext/sitemap/__init__.py | 0 pelicanext/sitemap/sitemap.py | 195 +++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 pelicanext/sitemap/__init__.py create mode 100644 pelicanext/sitemap/sitemap.py diff --git a/pelicanext/sitemap/__init__.py b/pelicanext/sitemap/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pelicanext/sitemap/sitemap.py b/pelicanext/sitemap/sitemap.py new file mode 100644 index 0000000..8043baa --- /dev/null +++ b/pelicanext/sitemap/sitemap.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import collections +import os.path + +from datetime import datetime +from logging import warning, info +from codecs import open + +from pelican import signals, contents + +TXT_HEADER = """{0}/index.html +{0}/archives.html +{0}/tags.html +{0}/categories.html +""" + +XML_HEADER = """ + +""" + +XML_URL = """ + +{0}/{1} +{2} +{3} +{4} + +""" + +XML_FOOTER = """ + +""" + + +def format_date(date): + if date.tzinfo: + tz = date.strftime('%s') + tz = tz[:-2] + ':' + tz[-2:] + else: + tz = "-00:00" + return date.strftime("%Y-%m-%dT%H:%M:%S") + tz + + +class SitemapGenerator(object): + + def __init__(self, context, settings, path, theme, output_path, *null): + + self.output_path = output_path + self.context = context + self.now = datetime.now() + self.siteurl = settings.get('SITEURL') + + self.format = 'xml' + + self.changefreqs = { + 'articles': 'monthly', + 'indexes': 'daily', + 'pages': 'monthly' + } + + self.priorities = { + 'articles': 0.5, + 'indexes': 0.5, + 'pages': 0.5 + } + + config = settings.get('SITEMAP', {}) + + if not isinstance(config, dict): + warning("sitemap plugin: the SITEMAP setting must be a dict") + else: + fmt = config.get('format') + pris = config.get('priorities') + chfreqs = config.get('changefreqs') + + if fmt not in ('xml', 'txt'): + warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'") + warning("sitemap plugin: Setting SITEMAP['format'] on `xml'") + elif fmt == 'txt': + self.format = fmt + return + + valid_keys = ('articles', 'indexes', 'pages') + valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', + 'yearly', 'never') + + if isinstance(pris, dict): + # We use items for Py3k compat. .iteritems() otherwise + for k, v in pris.items(): + if k in valid_keys and not isinstance(v, (int, float)): + default = self.priorities[k] + warning("sitemap plugin: priorities must be numbers") + warning("sitemap plugin: setting SITEMAP['priorities']" + "['{0}'] on {1}".format(k, default)) + pris[k] = default + self.priorities.update(pris) + elif pris is not None: + warning("sitemap plugin: SITEMAP['priorities'] must be a dict") + warning("sitemap plugin: using the default values") + + if isinstance(chfreqs, dict): + # .items() for py3k compat. + for k, v in chfreqs.items(): + if k in valid_keys and v not in valid_chfreqs: + default = self.changefreqs[k] + warning("sitemap plugin: invalid changefreq `{0}'".format(v)) + warning("sitemap plugin: setting SITEMAP['changefreqs']" + "['{0}'] on '{1}'".format(k, default)) + chfreqs[k] = default + self.changefreqs.update(chfreqs) + elif chfreqs is not None: + warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict") + warning("sitemap plugin: using the default values") + + + + def write_url(self, page, fd): + + if getattr(page, 'status', 'published') != 'published': + return + + page_path = os.path.join(self.output_path, page.url) + if not os.path.exists(page_path): + return + + lastmod = format_date(getattr(page, 'date', self.now)) + + if isinstance(page, contents.Article): + pri = self.priorities['articles'] + chfreq = self.changefreqs['articles'] + elif isinstance(page, contents.Page): + pri = self.priorities['pages'] + chfreq = self.changefreqs['pages'] + else: + pri = self.priorities['indexes'] + chfreq = self.changefreqs['indexes'] + + + if self.format == 'xml': + fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri)) + else: + fd.write(self.siteurl + '/' + loc + '\n') + + + def generate_output(self, writer): + path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format)) + + pages = self.context['pages'] + self.context['articles'] \ + + [ c for (c, a) in self.context['categories']] \ + + [ t for (t, a) in self.context['tags']] \ + + [ a for (a, b) in self.context['authors']] + + for article in self.context['articles']: + pages += article.translations + + info('writing {0}'.format(path)) + + with open(path, 'w', encoding='utf-8') as fd: + + if self.format == 'xml': + fd.write(XML_HEADER) + else: + fd.write(TXT_HEADER.format(self.siteurl)) + + FakePage = collections.namedtuple('FakePage', + ['status', + 'date', + 'url']) + + for standard_page_url in ['index.html', + 'archives.html', + 'tags.html', + 'categories.html']: + fake = FakePage(status='published', + date=self.now, + url=standard_page_url) + self.write_url(fake, fd) + + for page in pages: + self.write_url(page, fd) + + if self.format == 'xml': + fd.write(XML_FOOTER) + + +def get_generators(generators): + return SitemapGenerator + + +def register(): + signals.get_generators.connect(get_generators)