Changeset 199

Show
Ignore:
Timestamp:
08/23/08 17:27:40 (3 months ago)
Author:
rgrp
Message:

[shakespeare/stats][m]: substantial improvements to stats in both core and WUI.

  • Add word stats support in both core and in WUI
  • Improve tests to catch bug with existing text stats
  • Other WUI improvements e.g. to deal with non-existent texts, and provide an index page
  • TODO: word index page
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/shakespeare/controllers/stats.py

    Revision 193 Revision 199
    1import logging 1import logging 
    2 2 
    3import pygooglechart 3import pygooglechart 
    4 4 
    5from shakespeare.lib.base import * 5from shakespeare.lib.base import * 
    6log = logging.getLogger(__name__) 6log = logging.getLogger(__name__) 
    7import shakespeare.stats 7import shakespeare.stats 
    8 8 
    9class StatsController(BaseController): 9class StatsController(BaseController): 
    10 10 
    11    def index(self): 11    def index(self): 
    12        return render('stats/index') 12        return render('stats/index') 
    13      13  
       14     def text_index(self): 
       15         # only get those texts with stats 
       16         c.texts = model.Material.query.all() 
       17         import shakespeare.controllers.text 
       18         ctrl = shakespeare.controllers.text.TextController() 
       19         return ctrl.index() 
       20  
    14    def text(self, id): 21    def text(self, id): 
    15        text_name = id 22        text_name = id 
    16        text = model.Material.byName(text_name) 23        text = model.Material.byName(text_name) 
      24        # no id or no text by that id 
      25        if not text: 
      26            return self.text_index() 
    17        stats = shakespeare.stats.Stats() 27        stats = shakespeare.stats.Stats() 
    18        c.text = text 28        c.text = text 
    19        c.stats = stats.text_stats(text) 29        c.stats = stats.text_stats(text) 
    20        # 40 seems limit for google 30        # 40 seems limit for google 
    21        data = [ (s.word, s.freq) for s in c.stats[:40] ] 31        data = [ (s.word, s.freq) for s in c.stats[:40] ] 
    22        c.img_url = self.vertical_bar_chart(data) 32        c.img_url = self.vertical_bar_chart(data) 
    23        return render('stats/text') 33        return render('stats/text') 
    24 34 
      35    def word_index(self): 
      36        return '' 
      37     
      38    def word(self, id): 
      39        if id is None: 
      40            return self.word_index() 
      41        word = id 
      42        c.word = word 
      43        stats = shakespeare.stats.Stats() 
      44        c.stats = stats.word_stats(word) 
      45        # will not have that many texts so do not need to limit c.stats 
      46        data = [ (s.text.title, s.freq) for s in c.stats ] 
      47        c.img_url = self.vertical_bar_chart(data) 
      48        return render('stats/word') 
      49 
    25    # TODO: factor this out to its module (?) 50    # TODO: factor this out to its module (?) 
    26    def vertical_bar_chart(self, data, width=500): 51    def vertical_bar_chart(self, data, width=500): 
      52        if not data: 
      53            return '' 
    27        # tranpose 54        # tranpose 
    28        tdata = zip(*data) 55        tdata = zip(*data) 
    29        labels = list(tdata[0]) 56        labels = list(tdata[0]) 
    30        values = tdata[1] 57        values = tdata[1] 
    31        bar_width = 10 58        bar_width = 10 
    32        # add 5 for space between bars 59        # add 5 for space between bars 
    33        height = (bar_width + 5) * len(values) 60        height = (bar_width + 5) * len(values) 
    34        # was setting x_range but automatic behaviour seems better 61        # was setting x_range but automatic behaviour seems better 
    35        # x_range = (min(values), max(values)) 62        # x_range = (min(values), max(values)) 
    36        chart = pygooglechart.StackedHorizontalBarChart(width, height) 63        chart = pygooglechart.StackedHorizontalBarChart(width, height) 
    37        chart.set_bar_width(bar_width) 64        chart.set_bar_width(bar_width) 
    38        chart.set_colours(['cc0033']) 65        chart.set_colours(['cc0033']) 
    39        chart.add_data(values) 66        chart.add_data(values) 
    40        # have to reverse the labels for vertical 67        # have to reverse the labels for vertical 
    41        labels.reverse() 68        labels.reverse() 
    42        chart.set_axis_labels(pygooglechart.Axis.LEFT, labels) 69        chart.set_axis_labels(pygooglechart.Axis.LEFT, labels) 
    43        chart.set_axis_range(pygooglechart.Axis.BOTTOM, 0, max(values)) 70        chart.set_axis_range(pygooglechart.Axis.BOTTOM, 0, max(values)) 
    44        chart.set_axis_range(pygooglechart.Axis.TOP, 0, max(values)) 71        chart.set_axis_range(pygooglechart.Axis.TOP, 0, max(values)) 
    45        url = chart.get_url() 72        url = chart.get_url() 
    46        return url 73        return url 
    47 74 
  • trunk/shakespeare/stats.py

    Revision 192 Revision 199
    1""" 1""" 
    2Statistics for texts. 2Statistics for texts. 
    3 3 
    4All word keys are lower-cased in order to render them case-insensitive and 4All word keys are lower-cased in order to render them case-insensitive and 
    5are stemmed using the Xapian standard English stemmer. 5are stemmed using the Xapian standard English stemmer. 
    6 6 
    7TODO 7TODO 
    8==== 8==== 
    9 9 
    101. Provide for normalized statistics (that is occurences normalized by their 101. Provide for normalized statistics (that is occurences normalized by their 
    11occurence in the particular text). 11occurence in the particular text). 
    12 12 
    132. Support for aggregate statistics across multiple texts 132. Support for aggregate statistics across multiple texts 
    14""" 14""" 
    15import re 15import re 
    16import xapian 16import xapian 
    17 17 
    18import shakespeare.model as model 18import shakespeare.model as model 
    19 19 
    20class Stats(object): 20class Stats(object): 
    21 21 
    22    @classmethod 22    @classmethod 
    23    def analyze(self, fileobj): 23    def analyze(self, fileobj): 
    24        '''Get statistics on text in fileobj. 24        '''Get statistics on text in fileobj. 
    25 25 
    26        Words are stemmed so that e.g. love and loved count as the same word. 26        Words are stemmed so that e.g. love and loved count as the same word. 
    27        ''' 27        ''' 
    28        # (?) maybe could use xapian.TermGenerator to split document 28        # (?) maybe could use xapian.TermGenerator to split document 
    29        WORD_RE = re.compile('\\w{1,32}', re.U) 29        WORD_RE = re.compile('\\w{1,32}', re.U) 
    30        stemmer = xapian.Stem('english') 30        stemmer = xapian.Stem('english') 
    31        results = {} 31        results = {} 
    32        text = fileobj.read() 32        text = fileobj.read() 
    33        text = text.encode('utf8') 33        text = text.encode('utf8') 
    34        for term in WORD_RE.finditer(text): 34        for term in WORD_RE.finditer(text): 
    35            word = term.group() 35            word = term.group() 
    36            word = word.lower() 36            word = word.lower() 
    37            stemmed_word = stemmer(word) 37            stemmed_word = stemmer(word) 
    38            results[stemmed_word] = results.get(stemmed_word, 0) + 1 38            results[stemmed_word] = results.get(stemmed_word, 0) + 1 
    39        return results 39        return results 
    40 40 
    41    def statsify(self, material, fileobj): 41    def statsify(self, material, fileobj): 
    42        '''Create statistics associated to domain object `material` whose 42        '''Create statistics associated to domain object `material` whose 
    43        content is in `fileobj`. 43        content is in `fileobj`. 
    44        ''' 44        ''' 
    45        stats = self.analyze(fileobj) 45        stats = self.analyze(fileobj) 
    46        for k in stats: 46        for k in stats: 
    47            model.Statistic(text=material, 47            model.Statistic(text=material, 
    48                    word=k, 48                    word=k, 
    49                    freq=stats[k] 49                    freq=stats[k] 
    50                    ) 50                    ) 
    51        model.Session.flush() 51        model.Session.flush() 
    52 52 
    53    def freq(self, text, word): 53    def freq(self, text, word): 
    54        stat = model.Statistic.query.filter_by( 54        stat = model.Statistic.query.filter_by( 
    55                text=text).filter_by(word=word).first() 55                text=text).filter_by(word=word).first() 
    56        if stat: 56        if stat: 
    57            return stat.freq 57            return stat.freq 
    58        else: 58        else: 
    59            return 0 59            return 0 
    60 60 
    61    def text_stats(self, text): 61    def text_stats(self, text): 
    62        '''Return word statistics for text, most popular word first.'''  62         '''Statistics for text, most popular word first.''' 
    63        stats = model.Statistic.query.order_by(model.Statistic.freq.desc()).all()  63         stats = model.Statistic.query.filter_by(text=text).order_by( 
       64                 model.Statistic.freq.desc() 
       65                 ).all() 
    64        return stats 66        return stats 
    65 67 
      68    def word_stats(self, word): 
      69        '''Statistics for word (i.e. which texts use it) in order or 
      70        usage.''' 
      71        stats = model.Statistic.query.filter_by(word=word).order_by( 
      72                model.Statistic.freq.desc() 
      73                ).all() 
      74        return stats 
      75 
  • trunk/shakespeare/templates/stats/index.html

    Revision 187 Revision 199
    1<html xmlns:py="http://genshi.edgewall.org/" 1<html xmlns:py="http://genshi.edgewall.org/" 
    2  xmlns:xi="http://www.w3.org/2001/XInclude"> 2  xmlns:xi="http://www.w3.org/2001/XInclude"> 
    3   3   
    4  <py:def function="page_title">Stats</py:def> 4  <py:def function="page_title">Stats</py:def> 
    5 5 
    6  <div py:match="content"> 6  <div py:match="content"> 
    7    <p>This section provides statistical information about word occurences in 7    <p>This section provides statistical information about the various textual 
    8    the various textual materials available on the site. If you know the name 8    materials available on the site. 
    9    of a text you can get information by visiting ./text_name/   
    10    </p> 9    </p> 
      10    <p> 
      11      Currently information is provided organized by: 
      12    </p> 
      13    <ul> 
      14      <li> 
      15        <a href="${h.url_for(controller='stats', action='text', 
      16          id=None)}">Text</a> 
      17      </li> 
      18      <li> 
      19        <a href="${h.url_for(controller='stats', action='word', 
      20          id=None)}">Word</a> 
      21      </li> 
      22    </ul> 
    11  </div> 23  </div> 
    12 24 
    13  <xi:include href="../layout.html" /> 25  <xi:include href="../layout.html" /> 
    14</html> 26</html> 
  • trunk/shakespeare/templates/stats/text.html

    Revision 193 Revision 199
    1<html xmlns:py="http://genshi.edgewall.org/" 1<html xmlns:py="http://genshi.edgewall.org/" 
    2  xmlns:xi="http://www.w3.org/2001/XInclude"> 2  xmlns:xi="http://www.w3.org/2001/XInclude"> 
    3   3   
    4  <py:def function="page_title">Stats for ${c.text.title}</py:def> 4  <py:def function="page_title">Stats for ${c.text.title}</py:def> 
    5 5 
    6  <div py:match="content"> 6  <div py:match="content"> 
    7    <img style="float: left;" 7    <img style="float: left;" 
    8      src="${c.img_url}" 8      src="${c.img_url}" 
    9      alt="Word Statistics Bar Chart" /> 9      alt="Word Statistics Bar Chart" /> 
    10 10 
    11    <table border="1" style="margin-left: 550px;">  11     <p py:if="not c.stats"> 
       12       Sorry, no statistics are available for ${c.text.title} (name: ${c.text.name}) 
       13     </p> 
       14     <table style="margin-left: 550px;"> 
    12    <thead> 15    <thead> 
    13      <tr> 16      <tr> 
    14        <th>Index</th> 17        <th>Index</th> 
    15        <th> 18        <th> 
    16          Word   19          Word   
    17        </th> 20        </th> 
    18        <th> 21        <th> 
    19          Frequency 22          Frequency 
    20        </th> 23        </th> 
    21      </tr> 24      </tr> 
    22    </thead> 25    </thead> 
    23    <tbody> 26    <tbody> 
    24      <tr py:for="index, stat in enumerate(c.stats)"> 27      <tr py:for="index, stat in enumerate(c.stats)"> 
    25        <td> 28        <td> 
    26          ${index + 1} 29          ${index + 1} 
    27        </td> 30        </td> 
    28        <td> 31        <td> 
    29          ${stat.word} 32          ${stat.word} 
    30        </td> 33        </td> 
    31        <td> 34        <td> 
    32          ${stat.freq} 35          ${stat.freq} 
    33        </td> 36        </td> 
    34      </tr> 37      </tr> 
    35    </tbody> 38    </tbody> 
    36    </table> 39    </table> 
    37  </div> 40  </div> 
    38 41 
    39  <xi:include href="../layout.html" /> 42  <xi:include href="../layout.html" /> 
    40</html> 43</html> 
  • trunk/shakespeare/templates/stats/word.html

    Revision 193 Revision 199
    1<html xmlns:py="http://genshi.edgewall.org/" 1<html xmlns:py="http://genshi.edgewall.org/" 
    2  xmlns:xi="http://www.w3.org/2001/XInclude"> 2  xmlns:xi="http://www.w3.org/2001/XInclude"> 
    3   3   
    4  <py:def function="page_title">Stats for ${c.text.title}</py:def> 4  <py:def function="page_title">Stats for '${c.word}'</py:def> 
    5 5 
    6  <div py:match="content"> 6  <div py:match="content"> 
    7    <img style="float: left;" 7    <img style="float: left;" 
    8      src="${c.img_url}" 8      src="${c.img_url}" 
    9      alt="Word Statistics Bar Chart" /> 9      alt="Word Statistics Bar Chart" /> 
    10 10 
    11    <table border="1" style="margin-left: 550px;">  11     <p py:if="not c.stats"> 
       12       Sorry, no statistics are available for ${c.text.title} (name: ${c.text.name}) 
       13     </p> 
       14     <table style="margin-left: 550px;"> 
    12    <thead> 15    <thead> 
    13      <tr> 16      <tr> 
    14        <th>Index</th> 17        <th>Index</th> 
    15        <th> 18        <th> 
    16          Word   19          Text 
    17        </th> 20        </th> 
    18        <th> 21        <th> 
    19          Frequency 22          Frequency 
    20        </th> 23        </th> 
    21      </tr> 24      </tr> 
    22    </thead> 25    </thead> 
    23    <tbody> 26    <tbody> 
    24      <tr py:for="index, stat in enumerate(c.stats)"> 27      <tr py:for="index, stat in enumerate(c.stats)"> 
    25        <td> 28        <td> 
    26          ${index + 1} 29          ${index + 1} 
    27        </td> 30        </td> 
    28        <td> 31        <td> 
    29          ${stat.word32          ${stat.text.title
    30        </td> 33        </td> 
    31        <td> 34        <td> 
    32          ${stat.freq} 35          ${stat.freq} 
    33        </td> 36        </td> 
    34      </tr> 37      </tr> 
    35    </tbody> 38    </tbody> 
    36    </table> 39    </table> 
    37  </div> 40  </div> 
    38 41 
    39  <xi:include href="../layout.html" /> 42  <xi:include href="../layout.html" /> 
    40</html> 43</html> 
  • trunk/shakespeare/tests/__init__.py

    Revision 187 Revision 199
    1"""Pylons application test package 1"""Pylons application test package 
    2 2 
    3When the test runner finds and executes tests within this directory, 3When the test runner finds and executes tests within this directory, 
    4this file will be loaded to setup the test environment. 4this file will be loaded to setup the test environment. 
    5 5 
    6It registers the root directory of the project in sys.path and 6It registers the root directory of the project in sys.path and 
    7pkg_resources, in case the project hasn't been installed with 7pkg_resources, in case the project hasn't been installed with 
    8setuptools. It also initializes the application via websetup (paster 8setuptools. It also initializes the application via websetup (paster 
    9setup-app) with the project's test.ini configuration file. 9setup-app) with the project's test.ini configuration file. 
    10""" 10""" 
    11import os 11import os 
    12import sys 12import sys 
    13 13 
    14import pkg_resources 14import pkg_resources 
    15import paste.fixture 15import paste.fixture 
    16import paste.script.appinstall 16import paste.script.appinstall 
    17from paste.deploy import loadapp 17from paste.deploy import loadapp 
    18from routes import url_for 18from routes import url_for 
    19 19 
    20__all__ = ['url_for', 'TestController', 'make_fixture'20__all__ = ['url_for', 'TestController', 'make_fixture', 'make_fixture2'
    21 21 
    22here_dir = os.path.dirname(os.path.abspath(__file__)) 22here_dir = os.path.dirname(os.path.abspath(__file__)) 
    23conf_dir = os.path.dirname(os.path.dirname(here_dir)) 23conf_dir = os.path.dirname(os.path.dirname(here_dir)) 
    24 24 
    25sys.path.insert(0, conf_dir) 25sys.path.insert(0, conf_dir) 
    26pkg_resources.working_set.add_entry(conf_dir) 26pkg_resources.working_set.add_entry(conf_dir) 
    27pkg_resources.require('Paste') 27pkg_resources.require('Paste') 
    28pkg_resources.require('PasteScript') 28pkg_resources.require('PasteScript') 
    29 29 
    30test_file = os.path.join(conf_dir, 'test.ini') 30test_file = os.path.join(conf_dir, 'test.ini') 
    31cmd = paste.script.appinstall.SetupCommand('setup-app') 31cmd = paste.script.appinstall.SetupCommand('setup-app') 
    32cmd.run([test_file]) 32cmd.run([test_file]) 
    33 33 
    34sonnet18_text = \ 34sonnet18_text = \ 
    35'''Shall I compare thee to a summer's day? 35'''Shall I compare thee to a summer's day? 
    36Thou art more lovely and more temperate: 36Thou art more lovely and more temperate: 
    37Rough winds do shake the darling buds of May, 37Rough winds do shake the darling buds of May, 
    38And summer's lease hath all too short a date: 38And summer's lease hath all too short a date: 
    39 39 
    40Sometime too hot the eye of heaven shines, 40Sometime too hot the eye of heaven shines, 
    41And often is his gold complexion dimm'd, 41And often is his gold complexion dimm'd, 
    42And every fair from fair sometime declines, 42And every fair from fair sometime declines, 
    43By chance, or nature's changing course untrimm'd:  43By chance, or nature's changing course untrimm'd:  
    44 44 
    45But thy eternal summer shall not fade, 45But thy eternal summer shall not fade, 
    46Nor lose possession of that fair thou ow'st, 46Nor lose possession of that fair thou ow'st, 
    47Nor shall death brag thou wander'st in his shade, 47Nor shall death brag thou wander'st in his shade, 
    48When in eternal lines to time thou grow'st, 48When in eternal lines to time thou grow'st, 
    49 49 
    50  So long as men can breathe, or eyes can see, 50  So long as men can breathe, or eyes can see, 
    51  So long lives this, and this gives life to thee. 51  So long lives this, and this gives life to thee. 
    52''' 52''' 
    53 53 
    54# must use make_fixture rather than just create object as we need to be in 54# must use make_fixture rather than just create object as we need to be in 
    55# current db session 55# current db session 
    56def make_fixture(): 56def make_fixture(): 
    57    import shakespeare.model as model 57    import shakespeare.model as model 
    58    sonnet18_name = 'test_sonnet18' 58    sonnet18_name = 'test_sonnet18' 
    59    sonnet18 = model.Material.byName(sonnet18_name) 59    sonnet18 = model.Material.byName(sonnet18_name) 
    60    if not sonnet18: 60    if not sonnet18: 
    61        sonnet18 = model.Material(name=sonnet18_name, 61        sonnet18 = model.Material(name=sonnet18_name, 
    62                title='Sonnet 18', 62                title='Sonnet 18', 
    63                ) 63                ) 
    64        model.Session.flush() 64        model.Session.flush() 
    65    sonnet18.content = sonnet18_text 65    sonnet18.content = sonnet18_text 
    66    return sonnet18 66    return sonnet18 
    67 67 
      68def make_fixture2(): 
      69    import shakespeare.model as model 
      70    sonnet18_name = 'test_sonnet18_2' 
      71    sonnet18 = model.Material.byName(sonnet18_name) 
      72    if not sonnet18: 
      73        sonnet18 = model.Material(name=sonnet18_name, 
      74                title='Sonnet 18 Duplicate', 
      75                ) 
      76        model.Session.flush() 
      77    sonnet18.content = sonnet18_text 
      78    return sonnet18 
    68 79 
    69class TestController(object): 80class TestController(object): 
    70 81 
    71    def __init__(self, *args, **kwargs): 82    def __init__(self, *args, **kwargs): 
    72        wsgiapp = loadapp('config:test.ini', relative_to=conf_dir) 83        wsgiapp = loadapp('config:test.ini', relative_to=conf_dir) 
    73        self.app = paste.fixture.TestApp(wsgiapp) 84        self.app = paste.fixture.TestApp(wsgiapp) 
  • trunk/shakespeare/tests/functional/test_stats.py

    Revision 187 Revision 199
    1from shakespeare.tests import * 1from shakespeare.tests import * 
    2 2 
    3import shakespeare.model as model 3import shakespeare.model as model 
    4import shakespeare.tests.test_stats 4import shakespeare.tests.test_stats 
    5 5 
    6 6 
    7class TestSearchController(TestController): 7class TestSearchController(TestController): 
    8 8 
    9    text = make_fixture() 9    text = make_fixture() 
      10    text2 = make_fixture2() 
    10 11 
    11    def setUp(self): 12    def setUp(self): 
    12        model.Session.begin() 13        model.Session.begin() 
    13        shakespeare.tests.test_stats.stats_fixture(self.text) 14        shakespeare.tests.test_stats.stats_fixture(self.text) 
    14 15 
    15    def tearDown(self): 16    def tearDown(self): 
    16        model.Session.rollback() 17        model.Session.rollback() 
    17        model.Session.remove() 18        model.Session.remove() 
    18 19 
    19    def test_index(self): 20    def test_index(self): 
    20        url = url_for(controller='stats') 21        url = url_for(controller='stats') 
    21        res = self.app.get(url) 22        res = self.app.get(url) 
    22        assert 'Stats' in res 23        assert 'Stats' in res 
      24 
      25    def test_text_stats_index(self): 
      26        url = url_for(controller='stats', action='text', id=None) 
      27        res = self.app.get(url) 
      28        assert self.text.name in res 
      29        assert self.text2.name in res 
    23     30     
    24    def test_stats(self): 31    def test_text_stats(self): 
    25        text = make_fixture()   
    26        url = url_for(controller='stats', action='text', id=self.text.name) 32        url = url_for(controller='stats', action='text', id=self.text.name) 
    27        res = self.app.get(url) 33        res = self.app.get(url) 
    28        assert 'summer' in res 34        assert 'summer' in res 
    29 35 
      36    # TODO: stats for a text with no associated items  
      37    def test_text_no_stats(self): 
      38        url = url_for(controller='stats', action='text', id=self.text2.name) 
      39        res = self.app.get(url) 
      40        assert 'Sorry, no statistics' in res 
      41 
      42    def test_word_stats(self): 
      43        shakespeare.tests.test_stats.stats_fixture(self.text2) 
      44        word = 'summer' 
      45        url = url_for(controller='stats', action='word', id=word) 
      46        res = self.app.get(url) 
      47        assert 'summer' in res 
      48        assert self.text.title in res 
      49        assert self.text2.title in res 
      50        assert '3' in res 
      51 
  • trunk/shakespeare/tests/test_stats.py

    Revision 187 Revision 199
    1import os 1import os 
    2import shutil 2import shutil 
    3import tempfile 3import tempfile 
    4import StringIO 4import StringIO 
    5 5 
    6import shakespeare.stats 6import shakespeare.stats 
    7import shakespeare.model as model 7import shakespeare.model as model 
    8from shakespeare.tests import * 8from shakespeare.tests import * 
    9 9 
    10def stats_fixture(text): 10def stats_fixture(text): 
    11    stats = shakespeare.stats.Stats() 11    stats = shakespeare.stats.Stats() 
    12    fileobj = StringIO.StringIO(text.content) 12    fileobj = StringIO.StringIO(text.content) 
    13    stats.statsify(text, fileobj) 13    stats.statsify(text, fileobj) 
    14 14 
    15class TestStats: 15class TestStats: 
    16 16 
    17    def setUp(self): 17    def setUp(self): 
    18        self.stats = shakespeare.stats.Stats() 18        self.stats = shakespeare.stats.Stats() 
    19        self.text = make_fixture() 19        self.text = make_fixture() 
      20        self.text2 = make_fixture2() 
    20        model.Session.begin() 21        model.Session.begin() 
    21 22 
    22    def tearDown(self): 23    def tearDown(self): 
    23        model.Session.rollback() 24        model.Session.rollback() 
    24        model.Session.remove() 25        model.Session.remove() 
    25 26 
    26    def test_get_stats(self): 27    def test_get_stats(self): 
    27        simpletext = 'Death death dead love loved loving' 28        simpletext = 'Death death dead love loved loving' 
    28        out = self.stats.analyze(StringIO.StringIO(simpletext)) 29        out = self.stats.analyze(StringIO.StringIO(simpletext)) 
    29        assert len(out) == 3 30        assert len(out) == 3 
    30        assert out['love'] == 3 31        assert out['love'] == 3 
    31        assert out['death'] == 2 32        assert out['death'] == 2 
    32        assert out['dead'] == 1 33        assert out['dead'] == 1 
    33 34 
    34    def test_freq_nonexistent(self): 35    def test_freq_nonexistent(self): 
    35        nonexistent_word = 'abdfakfjadf' 36        nonexistent_word = 'abdfakfjadf' 
    36        freq = self.stats.freq(self.text, nonexistent_word) 37        freq = self.stats.freq(self.text, nonexistent_word) 
    37        assert freq == 0 38        assert freq == 0 
    38     39     
    39    def test_statsify(self): 40    def test_statsify(self): 
    40        stats_fixture(self.text) 41        stats_fixture(self.text) 
    41        word = 'summer' 42        word = 'summer' 
    42        freq = self.stats.freq(self.text, word) 43        freq = self.stats.freq(self.text, word) 
    43        assert freq == 3 44        assert freq == 3 
    44 45 
    45    def test_text_stats(self): 46    def test_text_stats(self): 
      47        # create stats for at least 2 texts to make sure we only pick up one 
    46        stats_fixture(self.text) 48        stats_fixture(self.text) 
      49        stats_fixture(self.text2) 
    47 50 
    48        stats = self.stats.text_stats(self.text) 51        stats = self.stats.text_stats(self.text) 
    49        for s in stats: 52        for s in stats: 
    50            print s.word, s.freq 53            print s.word, s.freq 
    51            if s.word == 'summer': break 54            if s.word == 'summer': break 
    52        assert stats[0].word == 'and' 55        assert stats[0].word == 'and' 
    53        assert stats[0].freq == 5 56        assert stats[0].freq == 5 
    54        assert stats[2].word == 'summer' 57        assert stats[2].word == 'summer' 
    55        assert stats[2].freq == 3 58        assert stats[2].freq == 3 
      59     
      60    def test_word_stats(self): 
      61        stats_fixture(self.text) 
      62        stats_fixture(self.text2) 
      63        stats = self.stats.word_stats('summer') 
      64        assert len(stats) == 2 
      65        assert stats[0].text.name == self.text.name 
      66        assert stats[0].freq == 3 
      67        # same text so should be the same! 
      68        assert stats[0].freq == stats[1].freq 
    56 69 
    57 70 
    58