diff --git a/post_stats/__init__.py b/post_stats/__init__.py new file mode 100644 index 0000000..2313bd5 --- /dev/null +++ b/post_stats/__init__.py @@ -0,0 +1 @@ +from .post_stats import * diff --git a/post_stats/post_stats.py b/post_stats/post_stats.py new file mode 100644 index 0000000..3f7c81e --- /dev/null +++ b/post_stats/post_stats.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +""" +Post Statistics +======================== + +This plugin calculates various statistics about a post and stores them in an article.stats dictionary: + +wc: how many words +read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension) +word_counts: frquency count of all the words in the article; can be used for tag/word clouds/ +fi: Flesch-kincaid Index/ Reading Ease +fk: Flesch-kincaid Grade Level + +""" + +from pelican import signals +from bs4 import BeautifulSoup +import re +from collections import Counter + +from .readability import * + + +def calculate_stats(instance): + + if instance._content is not None: + stats = {} + content = instance._content + + # How fast do average people read? + WPM = 250 + + # Use BeautifulSoup to get readable/visible text + raw_text = BeautifulSoup(content).getText() + + # Process the text to remove entities + entities = r'\&\#?.+?;' + raw_text = raw_text.replace(' ', ' ') + raw_text = re.sub(entities, '', raw_text) + + # Flesch-kincaid readbility stats counts sentances, + # so save before removing punctuation + tmp = raw_text + + # Process the text to remove punctuation + drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”' + raw_text = raw_text.translate(dict((ord(c), u'') for c in drop)) + + # Count the words in the text + words = raw_text.lower().split() + word_count = Counter(words) + + # Return the stats + stats['word_counts'] = word_count + stats['wc'] = sum(word_count.values()) + + # Calulate how long it'll take to read, rounding up + stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM + if stats['read_mins'] == 0: + stats['read_mins'] = 1 + + # Calculate Flesch-kincaid readbility stats + readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc']) + stats['fi'] = "{:.2f}".format(flesch_index(readability_stats)) + stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats)) + + instance.stats = stats + + +def register(): + signals.content_object_init.connect(calculate_stats) diff --git a/post_stats/readability.py b/post_stats/readability.py new file mode 100644 index 0000000..0cc1c71 --- /dev/null +++ b/post_stats/readability.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Adadpted from here: http://acdx.net/calculating-the-flesch-kincaid-level-in-python/ +# See here for details: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test + +from __future__ import division +import re + + +def mean(seq): + return sum(seq) / len(seq) + + +def syllables(word): + if len(word) <= 3: + return 1 + + word = re.sub(r"(es|ed|(?= 2, stcs) + + if wc: + words = wc + else: + words = sum(len(s) for s in stcs) + + sbls = sum(syllables(w) for s in stcs for w in s) + + return len(stcs), words, sbls + + +def flesch_index(stats): + stcs, words, sbls = stats + if stcs == 0 or words == 0: + return 0 + return 206.835 - 1.015 * (words / stcs) - 84.6 * (sbls / words) + + +def flesch_kincaid_level(stats): + stcs, words, sbls = stats + if stcs == 0 or words == 0: + return 0 + return 0.39 * (words / stcs) + 11.8 * (sbls / words) - 15.59 diff --git a/post_stats/readme.rst b/post_stats/readme.rst new file mode 100644 index 0000000..89fc13b --- /dev/null +++ b/post_stats/readme.rst @@ -0,0 +1,49 @@ +Post Statistics +================== + +A Pelican plugin to calculate various statistics about a post and store them in an article.stats dictionary: + +- ``wc``: how many words +- ``read_mins``: how many minutes would it take to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension) +- ``word_counts``: frquency count of all the words in the article; can be used for tag/word clouds +- ``fi``: Flesch-kincaid Index/ Reading Ease (see: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests) +- ``fk``: Flesch-kincaid Grade Level + +Example: + +.. code-block:: python + + { + 'wc': 2760, + 'fi': '65.94', + 'fk': '7.65', + 'word_counts': Counter({u'to': 98, u'a': 90, u'the': 83, u'of': 50, ...}), + 'read_mins': 12 + } + +This allows you to output these values in your templates, like this, for example: + +.. code-block:: html+jinja + +
~{{ article.stats['read_mins'] }} min read
+