Changeset 163
- Timestamp:
- 06/08/08 13:45:15 (4 months ago)
- Files:
-
- milton/textindex.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
milton/textindex.py
Revision 162 Revision 163 1 import xapian 1 import xapian 2 import string 2 import string 3 import os 3 import os 4 import milton 4 import milton 5 import glob 5 6 6 works = ('C:\\texts\\paradiselost.txt')7 7 8 #create the folder for a writable db 8 #create the folder for a writable db: alter path 9 document = xapian.WritableDatabase (' .\\index', xapian.DB_CREATE_OR_OPEN)9 document = xapian.WritableDatabase ('C:\\index', xapian.DB_CREATE_OR_OPEN) 10 10 11 indexer = xapian.TermGenerator() 11 indexer = xapian.TermGenerator() 12 stemmer = xapian.Stem("english") 12 stemmer = xapian.Stem("english") 13 indexer.set_stemmer(stemmer) 13 indexer.set_stemmer(stemmer) 14 # throws IO Error: Permission denied on ./cache: Hardcode Milton paths14 # Path needs to be changed to data path for shakespeare 15 texts = open (works)15 works= glob.glob('c:\\texts\\*.txt') 16 16 17 for texts in works: 18 f= open(texts) 17 para = '' 19 para = '' 18 try: 20 try: 19 for line in texts: 21 22 for line in f: 20 line = string.strip(line) 23 line = string.strip(line) 21 if line == '': 24 if line == '': 22 if para != '': 25 if para != '': 23 26 24 doc = xapian.Document() 27 doc = xapian.Document() 25 doc.set_data(para) 28 doc.set_data(para) 26 29 27 indexer.set_document(doc) 30 indexer.set_document(doc) 28 indexer.index_text(para) 31 indexer.index_text(para) 29 32 30 # Add the document to the database. 33 # Add the document to the database. 31 document.add_document(doc) 34 document.add_document(doc) 32 para = '' 35 para = '' 33 else: 36 else: 34 if para != '': 37 if para != '': 35 para += ' ' 38 para += ' ' 36 para += line 39 para += line 37 except StopIteration: 40 except StopIteration: 38 pass 41 pass 39 print Stopped 42 print Stopped
