Changeset 199
- Timestamp:
- 08/23/08 17:27:40 (3 months ago)
- Files:
-
- trunk/shakespeare/controllers/stats.py (modified) (1 diff)
- trunk/shakespeare/stats.py (modified) (1 diff)
- trunk/shakespeare/templates/stats/index.html (modified) (1 diff)
- trunk/shakespeare/templates/stats/text.html (modified) (1 diff)
- trunk/shakespeare/templates/stats/word.html (copied) (copied from trunk/shakespeare/templates/stats/text.html) (1 diff)
- trunk/shakespeare/tests/__init__.py (modified) (1 diff)
- trunk/shakespeare/tests/functional/test_stats.py (modified) (1 diff)
- trunk/shakespeare/tests/test_stats.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/shakespeare/controllers/stats.py
Revision 193 Revision 199 1 import logging 1 import logging 2 2 3 import pygooglechart 3 import pygooglechart 4 4 5 from shakespeare.lib.base import * 5 from shakespeare.lib.base import * 6 log = logging.getLogger(__name__) 6 log = logging.getLogger(__name__) 7 import shakespeare.stats 7 import shakespeare.stats 8 8 9 class StatsController(BaseController): 9 class StatsController(BaseController): 10 10 11 def index(self): 11 def index(self): 12 return render('stats/index') 12 return render('stats/index') 13 13 14 def text_index(self): 15 # only get those texts with stats 16 c.texts = model.Material.query.all() 17 import shakespeare.controllers.text 18 ctrl = shakespeare.controllers.text.TextController() 19 return ctrl.index() 20 14 def text(self, id): 21 def text(self, id): 15 text_name = id 22 text_name = id 16 text = model.Material.byName(text_name) 23 text = model.Material.byName(text_name) 24 # no id or no text by that id 25 if not text: 26 return self.text_index() 17 stats = shakespeare.stats.Stats() 27 stats = shakespeare.stats.Stats() 18 c.text = text 28 c.text = text 19 c.stats = stats.text_stats(text) 29 c.stats = stats.text_stats(text) 20 # 40 seems limit for google 30 # 40 seems limit for google 21 data = [ (s.word, s.freq) for s in c.stats[:40] ] 31 data = [ (s.word, s.freq) for s in c.stats[:40] ] 22 c.img_url = self.vertical_bar_chart(data) 32 c.img_url = self.vertical_bar_chart(data) 23 return render('stats/text') 33 return render('stats/text') 24 34 35 def word_index(self): 36 return '' 37 38 def word(self, id): 39 if id is None: 40 return self.word_index() 41 word = id 42 c.word = word 43 stats = shakespeare.stats.Stats() 44 c.stats = stats.word_stats(word) 45 # will not have that many texts so do not need to limit c.stats 46 data = [ (s.text.title, s.freq) for s in c.stats ] 47 c.img_url = self.vertical_bar_chart(data) 48 return render('stats/word') 49 25 # TODO: factor this out to its module (?) 50 # TODO: factor this out to its module (?) 26 def vertical_bar_chart(self, data, width=500): 51 def vertical_bar_chart(self, data, width=500): 52 if not data: 53 return '' 27 # tranpose 54 # tranpose 28 tdata = zip(*data) 55 tdata = zip(*data) 29 labels = list(tdata[0]) 56 labels = list(tdata[0]) 30 values = tdata[1] 57 values = tdata[1] 31 bar_width = 10 58 bar_width = 10 32 # add 5 for space between bars 59 # add 5 for space between bars 33 height = (bar_width + 5) * len(values) 60 height = (bar_width + 5) * len(values) 34 # was setting x_range but automatic behaviour seems better 61 # was setting x_range but automatic behaviour seems better 35 # x_range = (min(values), max(values)) 62 # x_range = (min(values), max(values)) 36 chart = pygooglechart.StackedHorizontalBarChart(width, height) 63 chart = pygooglechart.StackedHorizontalBarChart(width, height) 37 chart.set_bar_width(bar_width) 64 chart.set_bar_width(bar_width) 38 chart.set_colours(['cc0033']) 65 chart.set_colours(['cc0033']) 39 chart.add_data(values) 66 chart.add_data(values) 40 # have to reverse the labels for vertical 67 # have to reverse the labels for vertical 41 labels.reverse() 68 labels.reverse() 42 chart.set_axis_labels(pygooglechart.Axis.LEFT, labels) 69 chart.set_axis_labels(pygooglechart.Axis.LEFT, labels) 43 chart.set_axis_range(pygooglechart.Axis.BOTTOM, 0, max(values)) 70 chart.set_axis_range(pygooglechart.Axis.BOTTOM, 0, max(values)) 44 chart.set_axis_range(pygooglechart.Axis.TOP, 0, max(values)) 71 chart.set_axis_range(pygooglechart.Axis.TOP, 0, max(values)) 45 url = chart.get_url() 72 url = chart.get_url() 46 return url 73 return url 47 74 trunk/shakespeare/stats.py
Revision 192 Revision 199 1 """ 1 """ 2 Statistics for texts. 2 Statistics for texts. 3 3 4 All word keys are lower-cased in order to render them case-insensitive and 4 All word keys are lower-cased in order to render them case-insensitive and 5 are stemmed using the Xapian standard English stemmer. 5 are stemmed using the Xapian standard English stemmer. 6 6 7 TODO 7 TODO 8 ==== 8 ==== 9 9 10 1. Provide for normalized statistics (that is occurences normalized by their 10 1. Provide for normalized statistics (that is occurences normalized by their 11 occurence in the particular text). 11 occurence in the particular text). 12 12 13 2. Support for aggregate statistics across multiple texts 13 2. Support for aggregate statistics across multiple texts 14 """ 14 """ 15 import re 15 import re 16 import xapian 16 import xapian 17 17 18 import shakespeare.model as model 18 import shakespeare.model as model 19 19 20 class Stats(object): 20 class Stats(object): 21 21 22 @classmethod 22 @classmethod 23 def analyze(self, fileobj): 23 def analyze(self, fileobj): 24 '''Get statistics on text in fileobj. 24 '''Get statistics on text in fileobj. 25 25 26 Words are stemmed so that e.g. love and loved count as the same word. 26 Words are stemmed so that e.g. love and loved count as the same word. 27 ''' 27 ''' 28 # (?) maybe could use xapian.TermGenerator to split document 28 # (?) maybe could use xapian.TermGenerator to split document 29 WORD_RE = re.compile('\\w{1,32}', re.U) 29 WORD_RE = re.compile('\\w{1,32}', re.U) 30 stemmer = xapian.Stem('english') 30 stemmer = xapian.Stem('english') 31 results = {} 31 results = {} 32 text = fileobj.read() 32 text = fileobj.read() 33 text = text.encode('utf8') 33 text = text.encode('utf8') 34 for term in WORD_RE.finditer(text): 34 for term in WORD_RE.finditer(text): 35 word = term.group() 35 word = term.group() 36 word = word.lower() 36 word = word.lower() 37 stemmed_word = stemmer(word) 37 stemmed_word = stemmer(word) 38 results[stemmed_word] = results.get(stemmed_word, 0) + 1 38 results[stemmed_word] = results.get(stemmed_word, 0) + 1 39 return results 39 return results 40 40 41 def statsify(self, material, fileobj): 41 def statsify(self, material, fileobj): 42 '''Create statistics associated to domain object `material` whose 42 '''Create statistics associated to domain object `material` whose 43 content is in `fileobj`. 43 content is in `fileobj`. 44 ''' 44 ''' 45 stats = self.analyze(fileobj) 45 stats = self.analyze(fileobj) 46 for k in stats: 46 for k in stats: 47 model.Statistic(text=material, 47 model.Statistic(text=material, 48 word=k, 48 word=k, 49 freq=stats[k] 49 freq=stats[k] 50 ) 50 ) 51 model.Session.flush() 51 model.Session.flush() 52 52 53 def freq(self, text, word): 53 def freq(self, text, word): 54 stat = model.Statistic.query.filter_by( 54 stat = model.Statistic.query.filter_by( 55 text=text).filter_by(word=word).first() 55 text=text).filter_by(word=word).first() 56 if stat: 56 if stat: 57 return stat.freq 57 return stat.freq 58 else: 58 else: 59 return 0 59 return 0 60 60 61 def text_stats(self, text): 61 def text_stats(self, text): 62 '''Return word statistics for text, most popular word first.''' 62 '''Statistics for text, most popular word first.''' 63 stats = model.Statistic.query.order_by(model.Statistic.freq.desc()).all() 63 stats = model.Statistic.query.filter_by(text=text).order_by( 64 model.Statistic.freq.desc() 65 ).all() 64 return stats 66 return stats 65 67 68 def word_stats(self, word): 69 '''Statistics for word (i.e. which texts use it) in order or 70 usage.''' 71 stats = model.Statistic.query.filter_by(word=word).order_by( 72 model.Statistic.freq.desc() 73 ).all() 74 return stats 75 trunk/shakespeare/templates/stats/index.html
Revision 187 Revision 199 1 <html xmlns:py="http://genshi.edgewall.org/" 1 <html xmlns:py="http://genshi.edgewall.org/" 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 3 3 4 <py:def function="page_title">Stats</py:def> 4 <py:def function="page_title">Stats</py:def> 5 5 6 <div py:match="content"> 6 <div py:match="content"> 7 <p>This section provides statistical information about word occurences in 7 <p>This section provides statistical information about the various textual 8 the various textual materials available on the site. If you know the name 8 materials available on the site. 9 of a text you can get information by visiting ./text_name/ 10 </p> 9 </p> 10 <p> 11 Currently information is provided organized by: 12 </p> 13 <ul> 14 <li> 15 <a href="${h.url_for(controller='stats', action='text', 16 id=None)}">Text</a> 17 </li> 18 <li> 19 <a href="${h.url_for(controller='stats', action='word', 20 id=None)}">Word</a> 21 </li> 22 </ul> 11 </div> 23 </div> 12 24 13 <xi:include href="../layout.html" /> 25 <xi:include href="../layout.html" /> 14 </html> 26 </html> trunk/shakespeare/templates/stats/text.html
Revision 193 Revision 199 1 <html xmlns:py="http://genshi.edgewall.org/" 1 <html xmlns:py="http://genshi.edgewall.org/" 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 3 3 4 <py:def function="page_title">Stats for ${c.text.title}</py:def> 4 <py:def function="page_title">Stats for ${c.text.title}</py:def> 5 5 6 <div py:match="content"> 6 <div py:match="content"> 7 <img style="float: left;" 7 <img style="float: left;" 8 src="${c.img_url}" 8 src="${c.img_url}" 9 alt="Word Statistics Bar Chart" /> 9 alt="Word Statistics Bar Chart" /> 10 10 11 <table border="1" style="margin-left: 550px;"> 11 <p py:if="not c.stats"> 12 Sorry, no statistics are available for ${c.text.title} (name: ${c.text.name}) 13 </p> 14 <table style="margin-left: 550px;"> 12 <thead> 15 <thead> 13 <tr> 16 <tr> 14 <th>Index</th> 17 <th>Index</th> 15 <th> 18 <th> 16 Word 19 Word 17 </th> 20 </th> 18 <th> 21 <th> 19 Frequency 22 Frequency 20 </th> 23 </th> 21 </tr> 24 </tr> 22 </thead> 25 </thead> 23 <tbody> 26 <tbody> 24 <tr py:for="index, stat in enumerate(c.stats)"> 27 <tr py:for="index, stat in enumerate(c.stats)"> 25 <td> 28 <td> 26 ${index + 1} 29 ${index + 1} 27 </td> 30 </td> 28 <td> 31 <td> 29 ${stat.word} 32 ${stat.word} 30 </td> 33 </td> 31 <td> 34 <td> 32 ${stat.freq} 35 ${stat.freq} 33 </td> 36 </td> 34 </tr> 37 </tr> 35 </tbody> 38 </tbody> 36 </table> 39 </table> 37 </div> 40 </div> 38 41 39 <xi:include href="../layout.html" /> 42 <xi:include href="../layout.html" /> 40 </html> 43 </html> trunk/shakespeare/templates/stats/word.html
Revision 193 Revision 199 1 <html xmlns:py="http://genshi.edgewall.org/" 1 <html xmlns:py="http://genshi.edgewall.org/" 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 3 3 4 <py:def function="page_title">Stats for ${c.text.title}</py:def>4 <py:def function="page_title">Stats for '${c.word}'</py:def> 5 5 6 <div py:match="content"> 6 <div py:match="content"> 7 <img style="float: left;" 7 <img style="float: left;" 8 src="${c.img_url}" 8 src="${c.img_url}" 9 alt="Word Statistics Bar Chart" /> 9 alt="Word Statistics Bar Chart" /> 10 10 11 <table border="1" style="margin-left: 550px;"> 11 <p py:if="not c.stats"> 12 Sorry, no statistics are available for ${c.text.title} (name: ${c.text.name}) 13 </p> 14 <table style="margin-left: 550px;"> 12 <thead> 15 <thead> 13 <tr> 16 <tr> 14 <th>Index</th> 17 <th>Index</th> 15 <th> 18 <th> 16 Word19 Text 17 </th> 20 </th> 18 <th> 21 <th> 19 Frequency 22 Frequency 20 </th> 23 </th> 21 </tr> 24 </tr> 22 </thead> 25 </thead> 23 <tbody> 26 <tbody> 24 <tr py:for="index, stat in enumerate(c.stats)"> 27 <tr py:for="index, stat in enumerate(c.stats)"> 25 <td> 28 <td> 26 ${index + 1} 29 ${index + 1} 27 </td> 30 </td> 28 <td> 31 <td> 29 ${stat. word}32 ${stat.text.title} 30 </td> 33 </td> 31 <td> 34 <td> 32 ${stat.freq} 35 ${stat.freq} 33 </td> 36 </td> 34 </tr> 37 </tr> 35 </tbody> 38 </tbody> 36 </table> 39 </table> 37 </div> 40 </div> 38 41 39 <xi:include href="../layout.html" /> 42 <xi:include href="../layout.html" /> 40 </html> 43 </html> trunk/shakespeare/tests/__init__.py
Revision 187 Revision 199 1 """Pylons application test package 1 """Pylons application test package 2 2 3 When the test runner finds and executes tests within this directory, 3 When the test runner finds and executes tests within this directory, 4 this file will be loaded to setup the test environment. 4 this file will be loaded to setup the test environment. 5 5 6 It registers the root directory of the project in sys.path and 6 It registers the root directory of the project in sys.path and 7 pkg_resources, in case the project hasn't been installed with 7 pkg_resources, in case the project hasn't been installed with 8 setuptools. It also initializes the application via websetup (paster 8 setuptools. It also initializes the application via websetup (paster 9 setup-app) with the project's test.ini configuration file. 9 setup-app) with the project's test.ini configuration file. 10 """ 10 """ 11 import os 11 import os 12 import sys 12 import sys 13 13 14 import pkg_resources 14 import pkg_resources 15 import paste.fixture 15 import paste.fixture 16 import paste.script.appinstall 16 import paste.script.appinstall 17 from paste.deploy import loadapp 17 from paste.deploy import loadapp 18 from routes import url_for 18 from routes import url_for 19 19 20 __all__ = ['url_for', 'TestController', 'make_fixture' ]20 __all__ = ['url_for', 'TestController', 'make_fixture', 'make_fixture2' ] 21 21 22 here_dir = os.path.dirname(os.path.abspath(__file__)) 22 here_dir = os.path.dirname(os.path.abspath(__file__)) 23 conf_dir = os.path.dirname(os.path.dirname(here_dir)) 23 conf_dir = os.path.dirname(os.path.dirname(here_dir)) 24 24 25 sys.path.insert(0, conf_dir) 25 sys.path.insert(0, conf_dir) 26 pkg_resources.working_set.add_entry(conf_dir) 26 pkg_resources.working_set.add_entry(conf_dir) 27 pkg_resources.require('Paste') 27 pkg_resources.require('Paste') 28 pkg_resources.require('PasteScript') 28 pkg_resources.require('PasteScript') 29 29 30 test_file = os.path.join(conf_dir, 'test.ini') 30 test_file = os.path.join(conf_dir, 'test.ini') 31 cmd = paste.script.appinstall.SetupCommand('setup-app') 31 cmd = paste.script.appinstall.SetupCommand('setup-app') 32 cmd.run([test_file]) 32 cmd.run([test_file]) 33 33 34 sonnet18_text = \ 34 sonnet18_text = \ 35 '''Shall I compare thee to a summer's day? 35 '''Shall I compare thee to a summer's day? 36 Thou art more lovely and more temperate: 36 Thou art more lovely and more temperate: 37 Rough winds do shake the darling buds of May, 37 Rough winds do shake the darling buds of May, 38 And summer's lease hath all too short a date: 38 And summer's lease hath all too short a date: 39 39 40 Sometime too hot the eye of heaven shines, 40 Sometime too hot the eye of heaven shines, 41 And often is his gold complexion dimm'd, 41 And often is his gold complexion dimm'd, 42 And every fair from fair sometime declines, 42 And every fair from fair sometime declines, 43 By chance, or nature's changing course untrimm'd: 43 By chance, or nature's changing course untrimm'd: 44 44 45 But thy eternal summer shall not fade, 45 But thy eternal summer shall not fade, 46 Nor lose possession of that fair thou ow'st, 46 Nor lose possession of that fair thou ow'st, 47 Nor shall death brag thou wander'st in his shade, 47 Nor shall death brag thou wander'st in his shade, 48 When in eternal lines to time thou grow'st, 48 When in eternal lines to time thou grow'st, 49 49 50 So long as men can breathe, or eyes can see, 50 So long as men can breathe, or eyes can see, 51 So long lives this, and this gives life to thee. 51 So long lives this, and this gives life to thee. 52 ''' 52 ''' 53 53 54 # must use make_fixture rather than just create object as we need to be in 54 # must use make_fixture rather than just create object as we need to be in 55 # current db session 55 # current db session 56 def make_fixture(): 56 def make_fixture(): 57 import shakespeare.model as model 57 import shakespeare.model as model 58 sonnet18_name = 'test_sonnet18' 58 sonnet18_name = 'test_sonnet18' 59 sonnet18 = model.Material.byName(sonnet18_name) 59 sonnet18 = model.Material.byName(sonnet18_name) 60 if not sonnet18: 60 if not sonnet18: 61 sonnet18 = model.Material(name=sonnet18_name, 61 sonnet18 = model.Material(name=sonnet18_name, 62 title='Sonnet 18', 62 title='Sonnet 18', 63 ) 63 ) 64 model.Session.flush() 64 model.Session.flush() 65 sonnet18.content = sonnet18_text 65 sonnet18.content = sonnet18_text 66 return sonnet18 66 return sonnet18 67 67 68 def make_fixture2(): 69 import shakespeare.model as model 70 sonnet18_name = 'test_sonnet18_2' 71 sonnet18 = model.Material.byName(sonnet18_name) 72 if not sonnet18: 73 sonnet18 = model.Material(name=sonnet18_name, 74 title='Sonnet 18 Duplicate', 75 ) 76 model.Session.flush() 77 sonnet18.content = sonnet18_text 78 return sonnet18 68 79 69 class TestController(object): 80 class TestController(object): 70 81 71 def __init__(self, *args, **kwargs): 82 def __init__(self, *args, **kwargs): 72 wsgiapp = loadapp('config:test.ini', relative_to=conf_dir) 83 wsgiapp = loadapp('config:test.ini', relative_to=conf_dir) 73 self.app = paste.fixture.TestApp(wsgiapp) 84 self.app = paste.fixture.TestApp(wsgiapp) trunk/shakespeare/tests/functional/test_stats.py
Revision 187 Revision 199 1 from shakespeare.tests import * 1 from shakespeare.tests import * 2 2 3 import shakespeare.model as model 3 import shakespeare.model as model 4 import shakespeare.tests.test_stats 4 import shakespeare.tests.test_stats 5 5 6 6 7 class TestSearchController(TestController): 7 class TestSearchController(TestController): 8 8 9 text = make_fixture() 9 text = make_fixture() 10 text2 = make_fixture2() 10 11 11 def setUp(self): 12 def setUp(self): 12 model.Session.begin() 13 model.Session.begin() 13 shakespeare.tests.test_stats.stats_fixture(self.text) 14 shakespeare.tests.test_stats.stats_fixture(self.text) 14 15 15 def tearDown(self): 16 def tearDown(self): 16 model.Session.rollback() 17 model.Session.rollback() 17 model.Session.remove() 18 model.Session.remove() 18 19 19 def test_index(self): 20 def test_index(self): 20 url = url_for(controller='stats') 21 url = url_for(controller='stats') 21 res = self.app.get(url) 22 res = self.app.get(url) 22 assert 'Stats' in res 23 assert 'Stats' in res 24 25 def test_text_stats_index(self): 26 url = url_for(controller='stats', action='text', id=None) 27 res = self.app.get(url) 28 assert self.text.name in res 29 assert self.text2.name in res 23 30 24 def test_stats(self): 31 def test_text_stats(self): 25 text = make_fixture() 26 url = url_for(controller='stats', action='text', id=self.text.name) 32 url = url_for(controller='stats', action='text', id=self.text.name) 27 res = self.app.get(url) 33 res = self.app.get(url) 28 assert 'summer' in res 34 assert 'summer' in res 29 35 36 # TODO: stats for a text with no associated items 37 def test_text_no_stats(self): 38 url = url_for(controller='stats', action='text', id=self.text2.name) 39 res = self.app.get(url) 40 assert 'Sorry, no statistics' in res 41 42 def test_word_stats(self): 43 shakespeare.tests.test_stats.stats_fixture(self.text2) 44 word = 'summer' 45 url = url_for(controller='stats', action='word', id=word) 46 res = self.app.get(url) 47 assert 'summer' in res 48 assert self.text.title in res 49 assert self.text2.title in res 50 assert '3' in res 51 trunk/shakespeare/tests/test_stats.py
Revision 187 Revision 199 1 import os 1 import os 2 import shutil 2 import shutil 3 import tempfile 3 import tempfile 4 import StringIO 4 import StringIO 5 5 6 import shakespeare.stats 6 import shakespeare.stats 7 import shakespeare.model as model 7 import shakespeare.model as model 8 from shakespeare.tests import * 8 from shakespeare.tests import * 9 9 10 def stats_fixture(text): 10 def stats_fixture(text): 11 stats = shakespeare.stats.Stats() 11 stats = shakespeare.stats.Stats() 12 fileobj = StringIO.StringIO(text.content) 12 fileobj = StringIO.StringIO(text.content) 13 stats.statsify(text, fileobj) 13 stats.statsify(text, fileobj) 14 14 15 class TestStats: 15 class TestStats: 16 16 17 def setUp(self): 17 def setUp(self): 18 self.stats = shakespeare.stats.Stats() 18 self.stats = shakespeare.stats.Stats() 19 self.text = make_fixture() 19 self.text = make_fixture() 20 self.text2 = make_fixture2() 20 model.Session.begin() 21 model.Session.begin() 21 22 22 def tearDown(self): 23 def tearDown(self): 23 model.Session.rollback() 24 model.Session.rollback() 24 model.Session.remove() 25 model.Session.remove() 25 26 26 def test_get_stats(self): 27 def test_get_stats(self): 27 simpletext = 'Death death dead love loved loving' 28 simpletext = 'Death death dead love loved loving' 28 out = self.stats.analyze(StringIO.StringIO(simpletext)) 29 out = self.stats.analyze(StringIO.StringIO(simpletext)) 29 assert len(out) == 3 30 assert len(out) == 3 30 assert out['love'] == 3 31 assert out['love'] == 3 31 assert out['death'] == 2 32 assert out['death'] == 2 32 assert out['dead'] == 1 33 assert out['dead'] == 1 33 34 34 def test_freq_nonexistent(self): 35 def test_freq_nonexistent(self): 35 nonexistent_word = 'abdfakfjadf' 36 nonexistent_word = 'abdfakfjadf' 36 freq = self.stats.freq(self.text, nonexistent_word) 37 freq = self.stats.freq(self.text, nonexistent_word) 37 assert freq == 0 38 assert freq == 0 38 39 39 def test_statsify(self): 40 def test_statsify(self): 40 stats_fixture(self.text) 41 stats_fixture(self.text) 41 word = 'summer' 42 word = 'summer' 42 freq = self.stats.freq(self.text, word) 43 freq = self.stats.freq(self.text, word) 43 assert freq == 3 44 assert freq == 3 44 45 45 def test_text_stats(self): 46 def test_text_stats(self): 47 # create stats for at least 2 texts to make sure we only pick up one 46 stats_fixture(self.text) 48 stats_fixture(self.text) 49 stats_fixture(self.text2) 47 50 48 stats = self.stats.text_stats(self.text) 51 stats = self.stats.text_stats(self.text) 49 for s in stats: 52 for s in stats: 50 print s.word, s.freq 53 print s.word, s.freq 51 if s.word == 'summer': break 54 if s.word == 'summer': break 52 assert stats[0].word == 'and' 55 assert stats[0].word == 'and' 53 assert stats[0].freq == 5 56 assert stats[0].freq == 5 54 assert stats[2].word == 'summer' 57 assert stats[2].word == 'summer' 55 assert stats[2].freq == 3 58 assert stats[2].freq == 3 59 60 def test_word_stats(self): 61 stats_fixture(self.text) 62 stats_fixture(self.text2) 63 stats = self.stats.word_stats('summer') 64 assert len(stats) == 2 65 assert stats[0].text.name == self.text.name 66 assert stats[0].freq == 3 67 # same text so should be the same! 68 assert stats[0].freq == stats[1].freq 56 69 57 70 58
