Changeset 164
- Timestamp:
- 07/17/08 21:00:32 (5 months ago)
- Files:
-
- milton/textindex.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
milton/textindex.py
Revision 163 Revision 164 1 import xapian 1 import xapian 2 import string 2 import string 3 import os 3 import os 4 import milton 4 import milton 5 import glob 5 import glob 6 6 7 7 8 #create the folder for a writable db: alter path 8 #create the folder for a writable db: alter path 9 document = xapian.WritableDatabase (' C:\\index', xapian.DB_CREATE_OR_OPEN)9 document = xapian.WritableDatabase ('./index', xapian.DB_CREATE_OR_OPEN) 10 10 11 indexer = xapian.TermGenerator() 11 indexer = xapian.TermGenerator() 12 stemmer = xapian.Stem("english") 12 stemmer = xapian.Stem("english") 13 indexer.set_stemmer(stemmer) 13 indexer.set_stemmer(stemmer) 14 # Path needs to be changed to data path for shakespeare 14 # Path needs to be changed to data path for shakespeare 15 works= glob.glob(' c:\\texts\\*.txt')15 works= glob.glob('./data/texts/*.txt') 16 16 17 for texts in works: 17 for texts in works: 18 f= open(texts) 18 f= open(texts) 19 para = '' 19 para = '' 20 try: 20 try: 21 21 22 for line in f: 22 for line in f: 23 line = string.strip(line) 23 line = string.strip(line) 24 if line == '': 24 if line == '': 25 if para != '': 25 if para != '': 26 26 27 doc = xapian.Document() 27 doc = xapian.Document() 28 doc.set_data(para) 28 doc.set_data(para) 29 29 30 indexer.set_document(doc) 30 indexer.set_document(doc) 31 indexer.index_text(para) 31 indexer.index_text(para) 32 32 33 # Add the document to the database. 33 # Add the document to the database. 34 document.add_document(doc) 34 document.add_document(doc) 35 para = '' 35 para = '' 36 else: 36 else: 37 if para != '': 37 if para != '': 38 para += ' ' 38 para += ' ' 39 para += line 39 para += line 40 except StopIteration: 40 except StopIteration: 41 pass 41 pass 42 print Stopped 42 print Stopped
