Changeset 169
- Timestamp:
- 07/19/08 18:43:05 (12 months ago)
- Location:
- trunk
- Files:
-
- 1 added
- 3 modified
- 1 copied
-
shakespeare.egg-info/paste_deploy_config.ini_tmpl (modified) (1 diff)
-
shakespeare/__init__.py (modified) (3 diffs)
-
shakespeare/cli.py (modified) (3 diffs)
-
shakespeare/search.py (copied) (copied from milton/textsearch.py) (1 diff)
-
shakespeare/tests/search_test.py (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/shakespeare.egg-info/paste_deploy_config.ini_tmpl
r155 r169 17 17 # 18 18 # At present should be different from the app's cache_dir 19 cachedir = ./cache 19 cachedir = cache 20 21 # Directory for Xapian search index 22 search_index_dir = searchindex 23 20 24 21 25 [server:main] -
trunk/shakespeare/__init__.py
r155 r169 56 56 57 57 58 2. Setup Package 59 ================ 58 Getting Started 59 *************** 60 60 61 Make a config file as follows:: 61 As a user: 62 ========== 62 63 63 paster make-config shakespeare config.ini 64 1. Basic setup 65 -------------- 66 67 To access most of the main features of Open Shakespeare you need a database. 68 For this an other bits and bobs of configuration you will need a configuration 69 file. 70 71 You can make a config file as follows:: 72 73 paster make-config shakespeare {your-config.ini} 64 74 65 75 Tweak the config file as appropriate and then setup the application:: 66 76 67 77 paster setup-app config.ini 68 69 70 3. Initialize the system 71 ======================== 78 79 [TODO: this should be part of setup-app] 72 80 73 81 Run:: … … 76 84 $ shakespeare-admin db init 77 85 78 If you want to build the concordance do:: 86 2. Extras 87 --------- 79 88 80 $ shakespeare-admin concordance 89 1. Search index. [TODO] 81 90 82 NB: This may take some time to run so be patient. TIP: using sqlite building 83 the concordance really **does** seem to run forever so recommend using 84 postgresql or mysql if you are going to build the concordance. 85 86 87 Getting Started 88 *************** 89 90 As a user: 91 ========== 92 93 Start up the web interface by running the webserver:: 91 2. You can start a web server to provide a easy-to-use web interface to the 92 shakespeare material and facilities by doing:: 94 93 95 94 $ paster serve {your-config.ini} … … 102 101 =============== 103 102 104 0. Copy development.ini.tmpl to development.ini and edit to your taste. 103 0. Setup 104 -------- 105 105 106 1. Check out the administrative commands: $ bin/shakespeare-admin help. 106 Follow the basic steps above put with an ini file named: development.ini 107 108 NB: you'll probably want to change log levels to debug. 109 110 1. Check out the administrative commands 111 ---------------------------------------- 112 113 $ bin/shakespeare-admin help. 107 114 108 115 2. Run the tests using either py.test of nosetests:: 116 ---------------------------------------------------- 109 117 110 118 $ nosetests shakespeare -
trunk/shakespeare/cli.py
r155 r169 9 9 TODO: self.verbose option and associated self._print 10 10 """ 11 12 def __init__(self, verbose=False): 13 # cmd.Cmd is not a new style class 14 cmd.Cmd.__init__(self) 15 self.verbose = verbose 11 16 12 17 prompt = 'The Bard > ' … … 195 200 print 'Information about this package.' 196 201 202 def do_search_add(self, line=None): 203 path = line.strip() 204 if not os.path.exists(path): 205 print '"%s" is not an existent path' % path 206 return 1 207 if os.path.isdir(path): 208 fns = os.listdir(path) 209 fns = filter(lambda x: x.endswith('.txt'), fns) 210 works = [ os.path.join(path, fn) for fn in fns ] 211 else: 212 works = [ path ] 213 import shakespeare.search 214 index = shakespeare.search.SearchIndex.default_index() 215 for work in works: 216 if self.verbose: 217 print 'Processing %s' % work 218 fileobj = open(work) 219 index.add_item(fileobj) 220 221 def help_search_add(self, line=None): 222 info = '''search_add {path} 223 224 Add contents of {path} (file itself or all text files in directory if 225 directory) to the search index.''' 226 print info 227 228 def do_search_add_all(self): 229 # TODO: automatically add all texts listed in index 230 pass 231 232 def do_search(self, line=None): 233 import shakespeare.search 234 index = shakespeare.search.SearchIndex.default_index() 235 query = line.strip() 236 if not query: 237 print 'No search term supplied.' 238 return 1 239 matches = index.search(query) 240 print "%i results found." % matches.get_matches_estimated() 241 print "Results 1-%i:" % matches.size() 242 243 for m in matches: 244 print 245 print '%i: %i%% docid=%i' % (m.rank + 1, m.percent, m.docid) 246 print m.document.get_data() 247 248 def help_search(self, line=None): 249 info = 'Supply a query with which to search the search index.' 250 print info 251 197 252 def main(): 198 253 import optparse … … 210 265 return 1 211 266 else: 212 cmd = ShakespeareAdmin( )267 cmd = ShakespeareAdmin(verbose=options.verbose) 213 268 args = ' '.join(args) 214 269 args = args.replace('-','_') -
trunk/shakespeare/search.py
r165 r169 1 # !/usr/bin/env python2 1 # Support for indexing and searching texts using xapian 2 import os 3 3 4 4 import xapian 5 5 6 if len(sys.argv) < 2:7 print >> sys.stderr, "Missing a search term" % sys.argv[0]8 sys.exit(1)6 class SearchIndex(object): 7 def __init__(self, index_dir): 8 self.index_dir = index_dir 9 9 10 try: 11 # Open the database for searching. 12 database = xapian.Database('./index') 10 @classmethod 11 def config_index_dir(self): 12 '''Get the search index directory specified in the config.''' 13 import shakespeare 14 conf = shakespeare.conf() 15 index_dir = conf['search_index_dir'] 16 return index_dir 13 17 14 # Start an enquire session. 15 enquire = xapian.Enquire(database) 18 @classmethod 19 def default_index(self): 20 '''Return a SearchIndex instance initialized with the path specified in 21 the configuration file. 22 ''' 23 index_dir = self.config_index_dir() 24 if not os.path.exists(index_dir): 25 os.makedirs(index_dir) 26 return SearchIndex(index_dir) 16 27 17 # Take the search argument and turn into a Xapian query 18 query_string = sys.argv[1] 19 for arg in sys.argv[2:]: 20 query_string += ' ' 21 query_string += arg 28 def add_item(self, fileobj): 29 # TODO: remove this comment as no longer relevant (?) 30 #create the folder for a writable db: alter path 31 document = xapian.WritableDatabase (self.index_dir, xapian.DB_CREATE_OR_OPEN) 32 indexer = xapian.TermGenerator() 33 stemmer = xapian.Stem("english") 34 indexer.set_stemmer(stemmer) 35 36 para = '' 37 try: 38 for line in fileobj: 39 line = line.strip() 40 if line == '': 41 if para != '': 42 doc = xapian.Document() 43 doc.set_data(para) 44 45 indexer.set_document(doc) 46 indexer.index_text(para) 47 48 # Add the document to the database. 49 document.add_document(doc) 50 para = '' 51 else: 52 if para != '': 53 para += ' ' 54 para += line 55 except StopIteration: 56 # TODO: what is happening here? 57 pass 58 print Stopped 59 60 def search(self, query_string): 61 # Open the database for searching. 62 database = xapian.Database(self.index_dir) 63 64 # Start an enquire session. 65 enquire = xapian.Enquire(database) 22 66 23 67 # Parse the query string to produce a Xapian::Query object. 24 qp = xapian.QueryParser()25 stemmer = xapian.Stem("english")26 qp.set_stemmer(stemmer)27 qp.set_database(database)28 qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)29 query = qp.parse_query(query_string)30 print "Parsed query is: %s" % query.get_description()68 qp = xapian.QueryParser() 69 stemmer = xapian.Stem("english") 70 qp.set_stemmer(stemmer) 71 qp.set_database(database) 72 qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) 73 query = qp.parse_query(query_string) 74 print "Parsed query is: %s" % query.get_description() 31 75 32 # Find the top 10 results for the query. 33 enquire.set_query(query) 34 matches = enquire.get_mset(0, 10) 76 # Find the top 10 results for the query. 77 enquire.set_query(query) 78 matches = enquire.get_mset(0, 10) 79 return matches 35 80 81 @classmethod 82 def print_matches(self, matches): 83 # Display the results. 84 print "%i results found." % matches.get_matches_estimated() 85 print "Results 1-%i:" % matches.size() 36 86 37 # Display the results. 38 print "%i results found." % matches.get_matches_estimated() 39 print "Results 1-%i:" % matches.size() 87 for m in matches: 88 print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()) 40 89 41 for m in matches:42 print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data())43 44 except Exception, e:45 print >> sys.stderr, "Exception: %s" % str(e)46 sys.exit(1)47
