Changeset 169

Show
Ignore:
Timestamp:
07/19/08 19:43:05 (5 months ago)
Author:
rgrp
Message:

[shakespeare/search][l]: create search package with SearchIndex? object + cli interface as first part of integration of xapian search support developed in Iain (next part is WUI).

* search.py (+ test): SearchIndex? class with add_item, search, default_index methods.

* paste_deploy_config.ini_tmpl: new config variable search_index_dir

* cli.py: search methods (search_add, search)

  • also support for verbose option

* init.py: refactor main docs (in docstring)

  • make clearer by having separate 'getting started' sections for users and developers plus remove mention
  • remove mention of concordanc and put in todo about search index
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/shakespeare.egg-info/paste_deploy_config.ini_tmpl

    Revision 155 Revision 169
    1# 1# 
    2# shakespeare - Pylons configuration 2# shakespeare - Pylons configuration 
    3# 3# 
    4# The %(here)s variable will be replaced with the parent directory of this file 4# The %(here)s variable will be replaced with the parent directory of this file 
    5# 5# 
    6[DEFAULT] 6[DEFAULT] 
    7debug = true 7debug = true 
    8email_to = you@yourdomain.com 8email_to = you@yourdomain.com 
    9smtp_server = localhost 9smtp_server = localhost 
    10error_email_from = paste@localhost 10error_email_from = paste@localhost 
    11 11 
    12 12 
    13# Cache directory where cached copies of downloaded materials can be stored 13# Cache directory where cached copies of downloaded materials can be stored 
    14#  14#  
    15# This directory needs to be semi-permanent so do *not* put under a location 15# This directory needs to be semi-permanent so do *not* put under a location 
    16# such as /tmp.  16# such as /tmp.  
    17#  17#  
    18# At present should be different from the app's cache_dir 18# At present should be different from the app's cache_dir 
    19cachedir = ./cache  19 cachedir = cache 
       20  
       21 # Directory for Xapian search index 
       22 search_index_dir = searchindex 
       23  
    20 24 
    21[server:main] 25[server:main] 
    22use = egg:Paste#http 26use = egg:Paste#http 
    23host = 0.0.0.0 27host = 0.0.0.0 
    24port = 5000 28port = 5000 
    25 29 
    26[app:main] 30[app:main] 
    27use = egg:shakespeare 31use = egg:shakespeare 
    28full_stack = true 32full_stack = true 
    29cache_dir = %(here)s/data 33cache_dir = %(here)s/data 
    30beaker.session.key = shakespeare 34beaker.session.key = shakespeare 
    31beaker.session.secret = ${app_instance_secret} 35beaker.session.secret = ${app_instance_secret} 
    32app_instance_uuid = ${app_instance_uuid} 36app_instance_uuid = ${app_instance_uuid} 
    33 37 
    34# If you'd like to fine-tune the individual locations of the cache data dirs 38# If you'd like to fine-tune the individual locations of the cache data dirs 
    35# for the Cache data, or the Session saves, un-comment the desired settings 39# for the Cache data, or the Session saves, un-comment the desired settings 
    36# here: 40# here: 
    37#beaker.cache.data_dir = %(here)s/data/cache 41#beaker.cache.data_dir = %(here)s/data/cache 
    38#beaker.session.data_dir = %(here)s/data/sessions 42#beaker.session.data_dir = %(here)s/data/sessions 
    39 43 
    40# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* 44# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* 
    41# Debug mode will enable the interactive debugging tool, allowing ANYONE to 45# Debug mode will enable the interactive debugging tool, allowing ANYONE to 
    42# execute malicious code after an exception is raised. 46# execute malicious code after an exception is raised. 
    43set debug = false 47set debug = false 
    44 48 
    45# using sqlite in memory leads to thread issues when using db ... 49# using sqlite in memory leads to thread issues when using db ... 
    46# sqlobject.dburi = sqlite:///:memory: 50# sqlobject.dburi = sqlite:///:memory: 
    47sqlobject.dburi = postgres://<username>:<password>@localhost/<your-dbname> 51sqlobject.dburi = postgres://<username>:<password>@localhost/<your-dbname> 
    48 52 
    49# Logging configuration 53# Logging configuration 
    50[loggers] 54[loggers] 
    51keys = root 55keys = root 
    52 56 
    53[handlers] 57[handlers] 
    54keys = console 58keys = console 
    55 59 
    56[formatters] 60[formatters] 
    57keys = generic 61keys = generic 
    58 62 
    59[logger_root] 63[logger_root] 
    60level = INFO 64level = INFO 
    61handlers = console 65handlers = console 
    62 66 
    63[handler_console] 67[handler_console] 
    64class = StreamHandler 68class = StreamHandler 
    65args = (sys.stderr,) 69args = (sys.stderr,) 
    66level = NOTSET 70level = NOTSET 
    67formatter = generic 71formatter = generic 
    68 72 
    69[formatter_generic] 73[formatter_generic] 
    70format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s 74format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s 
    71 75 
    72 76 
    73[misc] 77[misc] 
    74# directory where we can store all local copies of texts 78# directory where we can store all local copies of texts 
    75cachedir = ./cache 79cachedir = ./cache 
    76 80 
    77[db] 81[db] 
    78# sqlobject database uri. see sqlobject documentation for details 82# sqlobject database uri. see sqlobject documentation for details 
    79# uri = postgres://user:pass@host/dbname 83# uri = postgres://user:pass@host/dbname 
    80uri = sqlite:/:memory: 84uri = sqlite:/:memory: 
    81 85 
    82[web] 86[web] 
    83# directory where the templates used by web front end are kept 87# directory where the templates used by web front end are kept 
    84template_dir = ./src/shakespeare/template 88template_dir = ./src/shakespeare/template 
    85 89 
    86[annotater] 90[annotater] 
    87# url at which marginalia files (css/js etc) should be mounted 91# url at which marginalia files (css/js etc) should be mounted 
    88marginalia_prefix = /marginalia 92marginalia_prefix = /marginalia 
  • trunk/shakespeare/__init__.py

    Revision 155 Revision 169
    1''' 1''' 
    2Introduction 2Introduction 
    3************ 3************ 
    4 4 
    5The Open Shakespeare package provides a full open set of shakespeare's works 5The Open Shakespeare package provides a full open set of shakespeare's works 
    6(often in multiple versions) along with ancillary material, a variety of tools 6(often in multiple versions) along with ancillary material, a variety of tools 
    7and a python API. 7and a python API. 
    8 8 
    9Specifically in addition to the works themselves (often in multiple versions) 9Specifically in addition to the works themselves (often in multiple versions) 
    10there is an introduction, a chronology, explanatory notes, a concordance and 10there is an introduction, a chronology, explanatory notes, a concordance and 
    11search facilities. 11search facilities. 
    12 12 
    13All material is open source/open knowledge so that anyone can use, redistribute 13All material is open source/open knowledge so that anyone can use, redistribute 
    14and reuse these materials freely. For exact details of the license under which 14and reuse these materials freely. For exact details of the license under which 
    15this package is made available please see COPYING.txt. 15this package is made available please see COPYING.txt. 
    16 16 
    17Open Shakespeare has been developed under the aegis of the Open Knowledge 17Open Shakespeare has been developed under the aegis of the Open Knowledge 
    18Foundation (http://www.okfn.org/). 18Foundation (http://www.okfn.org/). 
    19 19 
    20Contact the Project 20Contact the Project 
    21******************* 21******************* 
    22 22 
    23Please mail info@okfn.org or join the okfn-discuss mailing list: 23Please mail info@okfn.org or join the okfn-discuss mailing list: 
    24 24 
    25  http://lists.okfn.org/listinfo/okfn-discuss 25  http://lists.okfn.org/listinfo/okfn-discuss 
    26 26 
    27 27 
    28Installation and Setup 28Installation and Setup 
    29********************** 29********************** 
    30 30 
    311. Install the code 311. Install the code 
    32=================== 32=================== 
    33 33 
    341.1: (EITHER) Install using setup.py (preferred) 341.1: (EITHER) Install using setup.py (preferred) 
    35------------------------------------------------ 35------------------------------------------------ 
    36 36 
    37Install ``shakespeare`` using easy_install:: 37Install ``shakespeare`` using easy_install:: 
    38 38 
    39    easy_install shakespeare 39    easy_install shakespeare 
    40 40 
    41NB: If you don't have easy_install you can get from here: 41NB: If you don't have easy_install you can get from here: 
    42 42 
    43<http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions> 43<http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions> 
    44 44 
    45 45 
    461.2 (OR) Get the code straight from subversion 461.2 (OR) Get the code straight from subversion 
    47------------------------------------------------ 47------------------------------------------------ 
    48 48 
    491. Check out the subversion trunk:: 491. Check out the subversion trunk:: 
    50 50 
    51    svn co https://knowledgeforge.net/shakespeare/svn/trunk 51    svn co https://knowledgeforge.net/shakespeare/svn/trunk 
    52 52 
    532. Do:: 532. Do:: 
    54 54 
    55    sudo python setup.py develop 55    sudo python setup.py develop 
    56 56 
    57 57 
    582. Setup Package 58Getting Started 
    59================ 59*************** 
    60 60 
    61Make a config file as follows::  61 As a user: 
       62 ========== 
    62 63 
    63    paster make-config shakespeare config.ini  64 1. Basic setup 
       65 -------------- 
       66  
       67 To access most of the main features of Open Shakespeare you need a database. 
       68 For this an other bits and bobs of configuration you will need a configuration 
       69 file. 
       70  
       71 You can make a config file as follows:: 
       72  
       73     paster make-config shakespeare {your-config.ini} 
    64 74 
    65Tweak the config file as appropriate and then setup the application:: 75Tweak the config file as appropriate and then setup the application:: 
    66 76 
    67    paster setup-app config.ini 77    paster setup-app config.ini 
    68 78  
    69 79[TODO: this should be part of setup-app] 
    703. Initialize the system   
    71========================   
    72 80 
    73Run:: 81Run:: 
    74 82 
    75    $ shakespeare-admin db create 83    $ shakespeare-admin db create 
    76    $ shakespeare-admin db init 84    $ shakespeare-admin db init 
    77 85 
    78If you want to build the concordance do::  86 2. Extras 
       87 --------- 
    79 88 
    80    $ shakespeare-admin concordance 891. Search index. [TODO] 
    81 90 
    82NB: This may take some time to run so be patient. TIP: using sqlite building 912. You can start a web server to provide a easy-to-use web interface to the 
    83the concordance really **does** seem to run forever so recommend using 92shakespeare material and facilities by doing:: 
    84postgresql or mysql if you are going to build the concordance.    
    85   
    86   
    87Getting Started   
    88***************   
    89   
    90As a user:   
    91==========   
    92   
    93Start up the web interface by running the webserver::   
    94 93 
    95    $ paster serve {your-config.ini} 94    $ paster serve {your-config.ini} 
    96 95 
    97NB: {your-config.ini} should be replaced with the name of the config file you 96NB: {your-config.ini} should be replaced with the name of the config file you 
    98created earlier. 97created earlier. 
    99 98 
    100 99 
    101As a developer: 100As a developer: 
    102=============== 101=============== 
    103 102 
    1040. Copy development.ini.tmpl to development.ini and edit to your taste.  103 0. Setup 
       104 -------- 
    105 105 
    1061. Check out the administrative commands: $ bin/shakespeare-admin help.  106 Follow the basic steps above put with an ini file named: development.ini 
       107  
       108 NB: you'll probably want to change log levels to debug. 
       109  
       110 1. Check out the administrative commands 
       111 ---------------------------------------- 
       112  
       113     $ bin/shakespeare-admin help. 
    107 114 
    1082. Run the tests using either py.test of nosetests:: 1152. Run the tests using either py.test of nosetests:: 
      116---------------------------------------------------- 
    109 117 
    110    $ nosetests shakespeare 118    $ nosetests shakespeare 
    111''' 119''' 
    112__version__ = '0.5dev' 120__version__ = '0.5dev' 
    113__application_name__ = 'shakespeare' 121__application_name__ = 'shakespeare' 
    114 122 
    115def conf(): 123def conf(): 
    116    import os 124    import os 
    117    defaultPath = os.path.abspath('./development.ini') 125    defaultPath = os.path.abspath('./development.ini') 
    118    envVarName = __application_name__.upper() + 'CONF' 126    envVarName = __application_name__.upper() + 'CONF' 
    119    confPath = os.environ.get(envVarName, defaultPath) 127    confPath = os.environ.get(envVarName, defaultPath) 
    120    if not os.path.exists(confPath): 128    if not os.path.exists(confPath): 
    121        raise ValueError('No Configuration file exists at: %s' % confPath) 129        raise ValueError('No Configuration file exists at: %s' % confPath) 
    122 130 
    123    # register the config 131    # register the config 
    124    import paste.deploy 132    import paste.deploy 
    125    import shakespeare.config.environment 133    import shakespeare.config.environment 
    126    pasteconf = paste.deploy.appconfig('config:' + confPath) 134    pasteconf = paste.deploy.appconfig('config:' + confPath) 
    127 135 
    128    shakespeare.config.environment.load_environment(pasteconf.global_conf, 136    shakespeare.config.environment.load_environment(pasteconf.global_conf, 
    129        pasteconf.local_conf) 137        pasteconf.local_conf) 
    130    from pylons import config 138    from pylons import config 
    131    conf = config 139    conf = config 
    132 140 
    133    # import ConfigParser 141    # import ConfigParser 
    134    # conf = ConfigParser.SafeConfigParser() 142    # conf = ConfigParser.SafeConfigParser() 
    135    # conf.read(confPath) 143    # conf.read(confPath) 
    136 144 
    137    return conf 145    return conf 
    138      146      
  • trunk/shakespeare/cli.py

    Revision 155 Revision 169
    1#!/usr/bin/env python 1#!/usr/bin/env python 
    2 2 
    3import cmd 3import cmd 
    4import os 4import os 
    5import StringIO 5import StringIO 
    6 6 
    7class ShakespeareAdmin(cmd.Cmd): 7class ShakespeareAdmin(cmd.Cmd): 
    8    """ 8    """ 
    9    TODO: self.verbose option and associated self._print 9    TODO: self.verbose option and associated self._print 
    10    """ 10    """ 
      11 
      12    def __init__(self, verbose=False): 
      13        # cmd.Cmd is not a new style class 
      14        cmd.Cmd.__init__(self) 
      15        self.verbose = verbose 
    11 16 
    12    prompt = 'The Bard > ' 17    prompt = 'The Bard > ' 
    13 18 
    14    def run_interactive(self, line=None): 19    def run_interactive(self, line=None): 
    15        """Run an interactive session. 20        """Run an interactive session. 
    16        """ 21        """ 
    17        print 'Welcome to shakespeare-admin interactive mode\n' 22        print 'Welcome to shakespeare-admin interactive mode\n' 
    18        self.do_about() 23        self.do_about() 
    19        print 'Type:  "?" or "help" for help on commands.\n' 24        print 'Type:  "?" or "help" for help on commands.\n' 
    20        while 1: 25        while 1: 
    21            try: 26            try: 
    22                self.cmdloop() 27                self.cmdloop() 
    23                break 28                break 
    24            except KeyboardInterrupt: 29            except KeyboardInterrupt: 
    25                raise 30                raise 
    26 31 
    27    def do_help(self, line=None): 32    def do_help(self, line=None): 
    28        cmd.Cmd.do_help(self, line) 33        cmd.Cmd.do_help(self, line) 
    29 34 
    30    def do_about(self, line=None): 35    def do_about(self, line=None): 
    31        import shakespeare 36        import shakespeare 
    32        version = shakespeare.__version__ 37        version = shakespeare.__version__ 
    33        about = \ 38        about = \ 
    34'''Open Shakespeare version %s. Copyright the Open Knowledge Foundation. 39'''Open Shakespeare version %s. Copyright the Open Knowledge Foundation. 
    35Open Shakespeare is open-knowledge and open-source. See COPYING for details. 40Open Shakespeare is open-knowledge and open-source. See COPYING for details. 
    36 41 
    37For more information about the package run `info`. 42For more information about the package run `info`. 
    38''' % version 43''' % version 
    39        print about 44        print about 
    40 45 
    41    def do_quit(self, line=None): 46    def do_quit(self, line=None): 
    42        sys.exit() 47        sys.exit() 
    43 48 
    44    def do_EOF(self, *args): 49    def do_EOF(self, *args): 
    45        print '' 50        print '' 
    46        sys.exit() 51        sys.exit() 
    47 52 
    48    # ================= 53    # ================= 
    49    # Commands 54    # Commands 
    50 55 
    51    def do_db(self, line=None): 56    def do_db(self, line=None): 
    52        actions = [ 'create', 'clean', 'rebuild', 'init' ] 57        actions = [ 'create', 'clean', 'rebuild', 'init' ] 
    53        if line is None or line not in actions: 58        if line is None or line not in actions: 
    54            self.help_db() 59            self.help_db() 
    55            return 1 60            return 1 
    56        import shakespeare.model 61        import shakespeare.model 
    57        if line == 'init': 62        if line == 'init': 
    58            import pkg_resources 63            import pkg_resources 
    59            pkg = 'shksprdata' 64            pkg = 'shksprdata' 
    60            meta = pkg_resources.resource_stream(pkg, 'texts/metadata.txt') 65            meta = pkg_resources.resource_stream(pkg, 'texts/metadata.txt') 
    61            shakespeare.model.Material.load_from_metadata(meta) 66            shakespeare.model.Material.load_from_metadata(meta) 
    62        else: 67        else: 
    63            shakespeare.model.__dict__[line+'db']() 68            shakespeare.model.__dict__[line+'db']() 
    64 69 
    65    def help_db(self, line=None): 70    def help_db(self, line=None): 
    66        usage = \ 71        usage = \ 
    67'''db { create | clean | rebuild | init } 72'''db { create | clean | rebuild | init } 
    68''' 73''' 
    69        print usage 74        print usage 
    70     75     
    71    def do_gutenberg(self, line=None): 76    def do_gutenberg(self, line=None): 
    72        import shakespeare.gutenberg 77        import shakespeare.gutenberg 
    73        helper = shakespeare.gutenberg.Helper(verbose=True) 78        helper = shakespeare.gutenberg.Helper(verbose=True) 
    74        if not line: 79        if not line: 
    75            helper.execute() 80            helper.execute() 
    76        elif line == 'print_index': 81        elif line == 'print_index': 
    77            import pprint 82            import pprint 
    78            pprint.pprint(helper.get_index()) 83            pprint.pprint(helper.get_index()) 
    79        else: 84        else: 
    80            msg = 'Unknown argument %s' % line 85            msg = 'Unknown argument %s' % line 
    81            raise Exception(msg) 86            raise Exception(msg) 
    82 87 
    83    def help_gutenberg(self, line=None): 88    def help_gutenberg(self, line=None): 
    84        usage = \ 89        usage = \ 
    85""" 90""" 
    86Download and process all Project Gutenberg shakespeare texts""" 91Download and process all Project Gutenberg shakespeare texts""" 
    87        print usage  92        print usage  
    88 93 
    89    def do_moby(self, line=None): 94    def do_moby(self, line=None): 
    90        import shakespeare.moby 95        import shakespeare.moby 
    91        helper = shakespeare.moby.Helper(verbose=True) 96        helper = shakespeare.moby.Helper(verbose=True) 
    92        if not line: 97        if not line: 
    93            helper.execute() 98            helper.execute() 
    94        elif line == 'print_index': 99        elif line == 'print_index': 
    95            import pprint 100            import pprint 
    96            pprint.pprint(helper.get_index()) 101            pprint.pprint(helper.get_index()) 
    97        else: 102        else: 
    98            msg = 'Unknown argument %s' % line 103            msg = 'Unknown argument %s' % line 
    99            raise Exception(msg) 104            raise Exception(msg) 
    100 105 
    101    def help_moby(self, line=None): 106    def help_moby(self, line=None): 
    102        usage = \ 107        usage = \ 
    103''' 108''' 
    104Download and process all Moby/Bosak shakespeare texts''' 109Download and process all Moby/Bosak shakespeare texts''' 
    105        print usage  110        print usage  
    106 111 
    107    def _init_index(self): 112    def _init_index(self): 
    108        import shakespeare.index 113        import shakespeare.index 
    109        self._index = shakespeare.index.all 114        self._index = shakespeare.index.all 
    110 115 
    111    def _filter_index(self, line): 116    def _filter_index(self, line): 
    112        """Filter items in index return only those whose id (url) is in line 117        """Filter items in index return only those whose id (url) is in line 
    113        If line is empty or None return all items 118        If line is empty or None return all items 
    114        """ 119        """ 
    115        if line: 120        if line: 
    116            textsToAdd = [] 121            textsToAdd = [] 
    117            textNames = line.split() 122            textNames = line.split() 
    118            for item in self._index: 123            for item in self._index: 
    119                if item.name in textNames: 124                if item.name in textNames: 
    120                    textsToAdd.append(item) 125                    textsToAdd.append(item) 
    121            return textsToAdd 126            return textsToAdd 
    122        else: 127        else: 
    123            self._init_index() 128            self._init_index() 
    124            return self._index 129            return self._index 
    125     130     
    126    def do_index(self, line): 131    def do_index(self, line): 
    127        self._init_index() 132        self._init_index() 
    128        header = \ 133        header = \ 
    129'''          +-------------------+ 134'''          +-------------------+ 
    130          | Index of Material | 135          | Index of Material | 
    131          +-------------------+ 136          +-------------------+ 
    132 137 
    133''' 138''' 
    134        print header 139        print header 
    135        for row in self._index: 140        for row in self._index: 
    136            print row.name.ljust(35), row.title 141            print row.name.ljust(35), row.title 
    137 142 
    138    def help_index(self, line=None): 143    def help_index(self, line=None): 
    139        usage = \ 144        usage = \ 
    140'''Print index of Shakespeare texts to stdout''' 145'''Print index of Shakespeare texts to stdout''' 
    141        print usage 146        print usage 
    142 147 
    143    def do_concordance(self, line=None): 148    def do_concordance(self, line=None): 
    144        self._init_index() 149        self._init_index() 
    145        print 'Making concordance (this may take some time ...):' 150        print 'Making concordance (this may take some time ...):' 
    146        from shakespeare.concordance import ConcordanceBuilder 151        from shakespeare.concordance import ConcordanceBuilder 
    147        import time 152        import time 
    148        start = end = 0 153        start = end = 0 
    149        start = time.time() 154        start = time.time() 
    150        cc = ConcordanceBuilder() 155        cc = ConcordanceBuilder() 
    151        textsToAdd = [] 156        textsToAdd = [] 
    152        if line is not None: 157        if line is not None: 
    153            textsToAdd = self._filter_index(line) 158            textsToAdd = self._filter_index(line) 
    154        else: 159        else: 
    155            def gut_non_folio(material): 160            def gut_non_folio(material): 
    156                return '_gut' in material.name and 'gut_f' not in material.name 161                return '_gut' in material.name and 'gut_f' not in material.name 
    157            textsToAdd = filter(gut_non_folio, self._index)  162            textsToAdd = filter(gut_non_folio, self._index)  
    158        for item in textsToAdd: 163        for item in textsToAdd: 
    159            print 'Adding: %s (%s)' % (item.name, item.title) 164            print 'Adding: %s (%s)' % (item.name, item.title) 
    160            cc.add_text(item.name) 165            cc.add_text(item.name) 
    161        end = time.time() 166        end = time.time() 
    162        timetaken = end - start 167        timetaken = end - start 
    163        print 'Finished. Time taken was %ss' % timetaken 168        print 'Finished. Time taken was %ss' % timetaken 
    164 169 
    165    def help_concordance(self, line=None): 170    def help_concordance(self, line=None): 
    166        usage = \ 171        usage = \ 
    167'''Create a concordance 172'''Create a concordance 
    168 173 
    169If no arguments supplied then use all non-folio gutenberg shakespeare texts. 174If no arguments supplied then use all non-folio gutenberg shakespeare texts. 
    170Otherwise arguments should be a space seperated list of work name ids 175Otherwise arguments should be a space seperated list of work name ids 
    171''' 176''' 
    172        print usage 177        print usage 
    173 178 
    174    def do_runserver(self, line=None): 179    def do_runserver(self, line=None): 
    175        self.help_runserver() 180        self.help_runserver() 
    176 181 
    177    def help_runserver(self, line=None): 182    def help_runserver(self, line=None): 
    178        usage = \ 183        usage = \ 
    179'''This command has been DEPRECATED. 184'''This command has been DEPRECATED. 
    180 185 
    181Please use `paster serve` to run a server now, e.g.:: 186Please use `paster serve` to run a server now, e.g.:: 
    182 187 
    183    paster serve <my-config.ini> 188    paster serve <my-config.ini> 
    184''' 189''' 
    185        print usage 190        print usage 
    186     191     
    187    def do_info(self, line=None): 192    def do_info(self, line=None): 
    188        import shakespeare 193        import shakespeare 
    189        info = shakespeare.__doc__ 194        info = shakespeare.__doc__ 
    190        print 195        print 
    191        print '       ## Open Shakespeare ##' 196        print '       ## Open Shakespeare ##' 
    192        print info 197        print info 
    193     198     
    194    def help_info(self, line=None): 199    def help_info(self, line=None): 
    195        print 'Information about this package.' 200        print 'Information about this package.' 
    196 201 
      202    def do_search_add(self, line=None): 
      203        path = line.strip() 
      204        if not os.path.exists(path): 
      205            print '"%s" is not an existent path' % path 
      206            return 1 
      207        if os.path.isdir(path): 
      208            fns = os.listdir(path) 
      209            fns = filter(lambda x: x.endswith('.txt'), fns) 
      210            works = [ os.path.join(path, fn) for fn in fns ] 
      211        else: 
      212            works = [ path ] 
      213        import shakespeare.search 
      214        index = shakespeare.search.SearchIndex.default_index() 
      215        for work in works: 
      216            if self.verbose: 
      217                print 'Processing %s' % work 
      218            fileobj = open(work) 
      219            index.add_item(fileobj) 
      220 
      221    def help_search_add(self, line=None): 
      222        info = '''search_add {path} 
      223 
      224Add contents of {path} (file itself or all text files in directory if 
      225directory) to the search index.''' 
      226        print info 
      227 
      228    def do_search_add_all(self): 
      229        # TODO: automatically add all texts listed in index 
      230        pass 
      231 
      232    def do_search(self, line=None): 
      233        import shakespeare.search 
      234        index = shakespeare.search.SearchIndex.default_index() 
      235        query = line.strip() 
      236        if not query: 
      237            print 'No search term supplied.' 
      238            return 1 
      239        matches = index.search(query) 
      240        print "%i results found." % matches.get_matches_estimated() 
      241        print "Results 1-%i:" % matches.size() 
      242 
      243        for m in matches: 
      244            print 
      245            print '%i: %i%% docid=%i' % (m.rank + 1, m.percent, m.docid) 
      246            print m.document.get_data() 
      247 
      248    def help_search(self, line=None): 
      249        info = 'Supply a query with which to search the search index.' 
      250        print info 
      251 
    197def main(): 252def main(): 
    198    import optparse 253    import optparse 
    199    usage = \ 254    usage = \ 
    200'''%prog [options] <command> 255'''%prog [options] <command> 
    201 256 
    202Run about or help for details.''' 257Run about or help for details.''' 
    203    parser = optparse.OptionParser(usage) 258    parser = optparse.OptionParser(usage) 
    204    parser.add_option('-v', '--verbose', dest='verbose', help='Be verbose', 259    parser.add_option('-v', '--verbose', dest='verbose', help='Be verbose', 
    205            action='store_true', default=False)  260            action='store_true', default=False)  
    206    options, args = parser.parse_args() 261    options, args = parser.parse_args() 
    207     262     
    208    if len(args) == 0: 263    if len(args) == 0: 
    209        parser.print_help() 264        parser.print_help() 
    210        return 1 265        return 1 
    211    else: 266    else: 
    212        cmd = ShakespeareAdmin(267        cmd = ShakespeareAdmin(verbose=options.verbose
    213        args = ' '.join(args) 268        args = ' '.join(args) 
    214        args = args.replace('-','_') 269        args = args.replace('-','_') 
    215        cmd.onecmd(args) 270        cmd.onecmd(args) 
    216 271 
  • trunk/shakespeare/search.py

    Revision 165 Revision 169
    1#!/usr/bin/env pytho1# Support for indexing and searching texts using xapia
    2 2import os 
    3 3 
    4import xapian 4import xapian 
    5 5 
    6if len(sys.argv) < 26class SearchIndex(object)
    7    print >> sys.stderr, "Missing a search term" % sys.argv[0] 7    def __init__(self, index_dir): 
    8    sys.exit(1) 8        self.index_dir = index_dir 
    9 9 
    10try:  10     @classmethod 
    11    # Open the database for searching.  11     def config_index_dir(self): 
    12    database = xapian.Database('./index')  12         '''Get the search index directory specified in the config.''' 
       13         import shakespeare 
       14         conf = shakespeare.conf() 
       15         index_dir = conf['search_index_dir'] 
       16         return index_dir 
    13 17 
    14        # Start an enquire session.  18     @classmethod 
    15    enquire = xapian.Enquire(database)  19     def default_index(self): 
       20         '''Return a SearchIndex instance initialized with the path specified in 
       21         the configuration file. 
       22         ''' 
       23         index_dir = self.config_index_dir() 
       24         if not os.path.exists(index_dir): 
       25             os.makedirs(index_dir) 
       26         return SearchIndex(index_dir) 
    16 27 
    17        # Take the search argument and turn into a Xapian query  28     def add_item(self, fileobj): 
    18    query_string = sys.argv[1]  29         # TODO: remove this comment as no longer relevant (?) 
    19    for arg in sys.argv[2:]:  30         #create the folder for a writable db: alter path 
    20        query_string += ' '  31         document = xapian.WritableDatabase (self.index_dir, xapian.DB_CREATE_OR_OPEN) 
    21        query_string += arg  32         indexer = xapian.TermGenerator() 
       33         stemmer = xapian.Stem("english") 
       34         indexer.set_stemmer(stemmer) 
       35  
       36         para = '' 
       37         try: 
       38              for line in fileobj: 
       39                 line = line.strip() 
       40                 if line == '': 
       41                     if para != '': 
       42                         doc = xapian.Document() 
       43                         doc.set_data(para) 
       44  
       45                         indexer.set_document(doc) 
       46                         indexer.index_text(para) 
       47  
       48                         # Add the document to the database. 
       49                         document.add_document(doc) 
       50                         para = '' 
       51                 else: 
       52                     if para != '': 
       53                         para += ' ' 
       54                     para += line 
       55         except StopIteration: 
       56             # TODO: what is happening here? 
       57             pass 
       58             print Stopped 
       59  
       60     def search(self, query_string): 
       61         # Open the database for searching. 
       62         database = xapian.Database(self.index_dir) 
       63  
       64             # Start an enquire session. 
       65         enquire = xapian.Enquire(database) 
    22 66 
    23        # Parse the query string to produce a Xapian::Query object. 67        # Parse the query string to produce a Xapian::Query object. 
    24    qp = xapian.QueryParser() 68        qp = xapian.QueryParser() 
    25    stemmer = xapian.Stem("english") 69        stemmer = xapian.Stem("english") 
    26    qp.set_stemmer(stemmer) 70        qp.set_stemmer(stemmer) 
    27    qp.set_database(database) 71        qp.set_database(database) 
    28    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) 72        qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) 
    29    query = qp.parse_query(query_string) 73        query = qp.parse_query(query_string) 
    30    print "Parsed query is: %s" % query.get_description() 74        print "Parsed query is: %s" % query.get_description() 
    31 75 
    32     # Find the top 10 results for the query.  76          # Find the top 10 results for the query. 
    33    enquire.set_query(query)  77         enquire.set_query(query) 
    34    matches = enquire.get_mset(0, 10)  78         matches = enquire.get_mset(0, 10) 
       79         return matches 
    35 80 
      81    @classmethod 
      82    def print_matches(self, matches): 
      83        # Display the results. 
      84        print "%i results found." % matches.get_matches_estimated() 
      85        print "Results 1-%i:" % matches.size() 
    36 86 
    37    # Display the results. 87        for m in matches: 
    38    print "%i results found." % matches.get_matches_estimated() 88            print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()) 
    39    print "Results 1-%i:" % matches.size()   
    40 89 
    41    for m in matches:   
    42        print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data())   
    43   
    44except Exception, e:   
    45    print >> sys.stderr, "Exception: %s" % str(e)   
    46    sys.exit(1)   
    47