Changeset 191
- Timestamp:
- 08/16/08 23:51:24 (11 months ago)
- Location:
- trunk/shakespeare
- Files:
-
- 5 modified
-
controllers/search.py (modified) (2 diffs)
-
search.py (modified) (9 diffs)
-
templates/search/index.html (modified) (1 diff)
-
tests/functional/test_search.py (modified) (2 diffs)
-
tests/test_search.py (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/shakespeare/controllers/search.py
r170 r191 10 10 11 11 def index(self): 12 query = request.params.get('query', '') 13 if query: 14 c.matches = self._get_results(query) 12 c.query = request.params.get('query', '') 13 if c.query: 14 c.matches = self._get_matches(c.query) 15 c.results = self._get_results(c.matches) 15 16 c.total = c.matches.get_matches_estimated() 16 17 else: … … 18 19 return render('search/index') 19 20 20 def _get_ results(self, query):21 def _get_matches(self, query): 21 22 index = shakespeare.search.SearchIndex.default_index() 22 matches = index.search(query )23 matches = index.search(query, numresults=50) 23 24 return matches 24 25 25 26 def _get_results(self, matches): 27 results = [] 28 for m in matches: 29 text, lineno = self._match_to_text(m) 30 if text: 31 # slight hack -- just attach direct to object 32 text._lineno = lineno 33 text._snippet = m.document.get_data() 34 results.append(text) 35 else: 36 # TODO: create a dummy text ... 37 pass 38 return results 39 40 def _match_to_text(self, m): 41 item_id = m.document.get_value(shakespeare.search.ITEM_ID) 42 text = model.Material.byName(item_id) 43 lineno = m.document.get_value(shakespeare.search.LINE_NO) 44 return (text, lineno) 45 -
trunk/shakespeare/search.py
r189 r191 36 36 import xapian 37 37 38 # keys for document values 39 ITEM_ID = 0 40 LINE_NO = 1 41 38 42 class SearchIndex(object): 39 43 def __init__(self, index_dir): … … 58 62 return SearchIndex(index_dir) 59 63 60 def add_item(self, fileobj ):61 d ocument = xapian.WritableDatabase(self.index_dir, xapian.DB_CREATE_OR_OPEN)64 def add_item(self, fileobj, item_id=None): 65 database = xapian.WritableDatabase(self.index_dir, xapian.DB_CREATE_OR_OPEN) 62 66 indexer = xapian.TermGenerator() 63 67 stemmer = xapian.Stem("english") … … 66 70 para = '' 67 71 try: 72 count = -1 73 para_start = 0 68 74 for line in fileobj: 75 count += 1 69 76 line = line.strip() 70 77 if line == '': … … 72 79 doc = xapian.Document() 73 80 doc.set_data(para) 81 id_term = 'I' + str(item_id) 82 doc.add_term(id_term) 83 doc.add_value(ITEM_ID, str(item_id)) 84 doc.add_value(LINE_NO, str(para_start)) 74 85 75 86 indexer.set_document(doc) … … 77 88 indexer.index_text(para) 78 89 79 # Add the document to the database.80 document.add_document(doc)90 database.add_document(doc) 91 # assume next para starts 81 92 para = '' 93 # must come after 94 para_start = count 82 95 else: 83 96 if para != '': … … 86 99 except StopIteration: 87 100 # TODO: what is happening here? 88 pass101 raise 89 102 90 def search(self, query_string): 91 # Open the database for searching. 103 def get_database(self): 92 104 database = xapian.Database(self.index_dir) 105 return database 93 106 94 # Start an enquire session. 107 def search(self, query_string, offset=0, numresults=10): 108 database = self.get_database() 95 109 enquire = xapian.Enquire(database) 96 97 # Parse the query string to produce a Xapian::Query object.98 110 qp = xapian.QueryParser() 99 111 stemmer = xapian.Stem("english") … … 102 114 qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) 103 115 query = qp.parse_query(query_string) 104 print "Parsed query is: %s" % query.get_description()105 106 # Find the top 10 results for the query.107 116 enquire.set_query(query) 108 # get search results offset, offset+count 109 offset = 0 110 count = 10 111 matches = enquire.get_mset(offset, count) 117 matches = enquire.get_mset(offset, numresults) 112 118 return matches 113 119 … … 126 132 works = [ path ] 127 133 for work in works: 128 if self.verbose:129 print 'Processing %s' % work130 134 fileobj = open(work) 131 self. index.add_item(fileobj)135 self.add_item(fileobj) 132 136 133 137 @classmethod … … 144 148 msg += '\n' 145 149 return msg 146 147 150 -
trunk/shakespeare/templates/search/index.html
r181 r191 11 11 12 12 <div class="search-results" py:if="c.matches is not None"> 13 <h3>Search Results </h3>13 <h3>Search Results For: ${c.query}</h3> 14 14 There were ${c.total} results. 15 15 <ul> 16 <li py:for="m in c.matches"> 17 ${m.document.get_data()} 16 <li py:for="m in c.results"> 17 Work: ${m.title}, Line: ${m._lineno} 18 <blockquote> 19 <pre>${m._snippet}</pre> 20 </blockquote> 18 21 </li> 19 22 </ul> -
trunk/shakespeare/tests/functional/test_search.py
r189 r191 11 11 text = make_fixture() 12 12 sindex = shakespeare.search.SearchIndex.default_index() 13 sindex.add_item(StringIO.StringIO(text.content) )13 sindex.add_item(StringIO.StringIO(text.content), text.name) 14 14 15 15 def test_index(self): … … 24 24 form['query'] = 'summer' 25 25 res = form.submit() 26 # print res 26 27 assert 'Search Results' in res 27 28 assert 'Shall I compare thee' in res -
trunk/shakespeare/tests/test_search.py
r189 r191 5 5 6 6 import shakespeare.search 7 import shakespeare.tests 7 8 8 9 class TestSearch: 9 # break up a little to make indexing more interesting10 text = \11 '''12 Shall I compare thee to a summer's day?13 Thou art more lovely and more temperate:14 Rough winds do shake the darling buds of May,15 And summer's lease hath all too short a date:16 17 Sometime too hot the eye of heaven shines,18 And often is his gold complexion dimm'd,19 And every fair from fair sometime declines,20 By chance, or nature's changing course untrimm'd:21 22 But thy eternal summer shall not fade,23 Nor lose possession of that fair thou ow'st,24 Nor shall death brag thou wander'st in his shade,25 When in eternal lines to time thou grow'st,26 27 So long as men can breathe, or eyes can see,28 So long lives this, and this gives life to thee.29 '''30 31 10 def setUp(self): 11 self.text = shakespeare.tests.make_fixture() 32 12 basetmp = tempfile.gettempdir() 33 13 self.tmpdir = os.path.join(basetmp, 'openshkspr-search') … … 37 17 os.makedirs(self.tmpdir) 38 18 self.index = shakespeare.search.SearchIndex(self.tmpdir) 19 self.index.add_item(StringIO.StringIO(self.text.content), 20 self.text.name) 21 22 # TODO: remove the document from the index 39 23 40 24 def test_add_item(self): 41 self.index.add_item(StringIO.StringIO(self.text))25 assert self.index.get_database().get_doccount() > 0 42 26 43 27 def test_search(self): 44 self.index.add_item(StringIO.StringIO(self.text))45 28 out = self.index.search('summer') 46 29 assert len(out) == 2 … … 51 34 out = self.index.search('rough') 52 35 assert len(out) == 1 36 37 def test_retrieve_lineno(self): 38 out = self.index.search('summer') 39 mset1 = out[1] 40 lineno = mset1.document.get_value(shakespeare.search.LINE_NO) 41 assert lineno == '9' 53 42 43 def test_retrieve_itemid(self): 44 out = self.index.search('summer') 45 mset1 = out[1] 46 name = mset1.document.get_value(shakespeare.search.ITEM_ID) 47 assert name == self.text.name 48
