diff --git a/post_stats/post_stats.py b/post_stats/post_stats.py index 6b38dd3..3f7c81e 100644 --- a/post_stats/post_stats.py +++ b/post_stats/post_stats.py @@ -3,11 +3,13 @@ Post Statistics ======================== -This plugin calculates various Statistics about a post and stores them in an article.stats disctionary: +This plugin calculates various statistics about a post and stores them in an article.stats dictionary: wc: how many words read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension) word_counts: frquency count of all the words in the article; can be used for tag/word clouds/ +fi: Flesch-kincaid Index/ Reading Ease +fk: Flesch-kincaid Grade Level """ @@ -16,6 +18,8 @@ from bs4 import BeautifulSoup import re from collections import Counter +from .readability import * + def calculate_stats(instance): @@ -26,18 +30,22 @@ def calculate_stats(instance): # How fast do average people read? WPM = 250 - # Pre-process the text to remove entities - entities = r'\&\#?.+?;' - content = content.replace(' ', ' ') - content = re.sub(entities, '', content) - - # Pre-process the text to remove punctuation - drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”' - content = content.translate(dict((ord(c), u'') for c in drop)) - # Use BeautifulSoup to get readable/visible text raw_text = BeautifulSoup(content).getText() + # Process the text to remove entities + entities = r'\&\#?.+?;' + raw_text = raw_text.replace(' ', ' ') + raw_text = re.sub(entities, '', raw_text) + + # Flesch-kincaid readbility stats counts sentances, + # so save before removing punctuation + tmp = raw_text + + # Process the text to remove punctuation + drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”' + raw_text = raw_text.translate(dict((ord(c), u'') for c in drop)) + # Count the words in the text words = raw_text.lower().split() word_count = Counter(words) @@ -45,11 +53,17 @@ def calculate_stats(instance): # Return the stats stats['word_counts'] = word_count stats['wc'] = sum(word_count.values()) + # Calulate how long it'll take to read, rounding up stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM if stats['read_mins'] == 0: stats['read_mins'] = 1 + # Calculate Flesch-kincaid readbility stats + readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc']) + stats['fi'] = "{:.2f}".format(flesch_index(readability_stats)) + stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats)) + instance.stats = stats diff --git a/post_stats/readability.py b/post_stats/readability.py new file mode 100644 index 0000000..0cc1c71 --- /dev/null +++ b/post_stats/readability.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Adadpted from here: http://acdx.net/calculating-the-flesch-kincaid-level-in-python/ +# See here for details: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test + +from __future__ import division +import re + + +def mean(seq): + return sum(seq) / len(seq) + + +def syllables(word): + if len(word) <= 3: + return 1 + + word = re.sub(r"(es|ed|(?= 2, stcs) + + if wc: + words = wc + else: + words = sum(len(s) for s in stcs) + + sbls = sum(syllables(w) for s in stcs for w in s) + + return len(stcs), words, sbls + + +def flesch_index(stats): + stcs, words, sbls = stats + if stcs == 0 or words == 0: + return 0 + return 206.835 - 1.015 * (words / stcs) - 84.6 * (sbls / words) + + +def flesch_kincaid_level(stats): + stcs, words, sbls = stats + if stcs == 0 or words == 0: + return 0 + return 0.39 * (words / stcs) + 11.8 * (sbls / words) - 15.59 diff --git a/post_stats/readme.rst b/post_stats/readme.rst new file mode 100644 index 0000000..f607d3b --- /dev/null +++ b/post_stats/readme.rst @@ -0,0 +1,49 @@ +Post Statistics +================== + +A Pelican plugin to calculate various statistics about a post and store them in an article.stats dictionary: + +- ``wc``: how many words +- ``read_mins``: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension) +- ``word_counts``: frquency count of all the words in the article; can be used for tag/word clouds/ +- ``fi``: Flesch-kincaid Index/ Reading Ease (see: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests) +- ``fk``: Flesch-kincaid Grade Level + +Example: + +.. code-block:: python + + { + 'wc': 2760, + 'fi': '65.94', + 'fk': '7.65', + 'word_counts': Counter({u'to': 98, u'a': 90, u'the': 83, u'of': 50, ...}), + 'read_mins': 12 + } + +This allows you to output these values in your templates, like this, for example: + +.. code-block:: html+jinja + +
~{{ article.stats['read_mins'] }} min read
+