Changeset 169:30df1ae8d73a

Show
Ignore:
Timestamp:
01/06/09 18:47:18 (14 months ago)
Author:
rgrp
Branch:
default
convert_revision:
svn:0ead1229-0713-0410-96cd-f668dbfad531/trunk@247
Message:

[shksprdata,moby][m]: update getdata/moby.py and start work on xsl transforms of moby texts.

  • shakespeare/cache: minor
    • minor refactoring
    • support for not downloading to nested directories (e.g. abc.com/abc.txt goes on disk to abc.txt not abc.com/abc.txt).
  • shksprdata/getdata/moby.py:
    • was broken so fix up (making simpler at same time)
    • create metadata file at same time as downloading files (and rename files)
    • start work on xsl transforms using iain's xslt file
  • shakespeare/init.py: fix to documentation and bump version to 0.7
  • MANIFEST.in: fix incorrect recursive includes (comment out in fact as not needed)
Files:
5 modified

Legend:

Unmodified
Added
Removed
  • MANIFEST.in

    r103 r169  
    11recursive-include shakespeare/public * 
    22recursive-include shakespeare/templates * 
    3 recursive-include shksprdata * 
     3# seem to be automatically included ... 
     4# recursive-include shksprdata *.txt 
     5# recursive-include shksprdata *.xml 
     6# recursive-include miltondata *.txt 
     7# recursive-include miltondata *.xml 
  • shakespeare/__init__.py

    r153 r169  
    44 
    55The Open Shakespeare package provides a full open set of shakespeare's works 
    6 (often in multiple versions) along with ancillary material, a variety of tools 
    7 and a python API. 
    8  
    9 Specifically in addition to the works themselves (often in multiple versions) 
    10 there is an introduction, a chronology, explanatory notes, a concordance and 
    11 search facilities. 
     6(often in multiple versions) along with ancillary material, a variety of tools, 
     7a python API and a web interface that provides access to many (but not all) of 
     8these facilities from the comfort of your web browser (see 
     9http://www.openshakespeare.org/ for a demo). 
    1210 
    1311All material is open source/open knowledge so that anyone can use, redistribute 
     
    8987 
    90882. To load the author packages, change into the miltondata or shksprdata directories 
    91 and run the command load-milton (or load-shakespeare) -c <path to your development.ini> 
    92 This will load the metadata text into the database.  
     89   and run the command load-milton (or load-shakespeare) -c <path to your 
     90   development.ini> This will load the metadata text into the database.  
    9391 
    94923. You can start a web server to provide a easy-to-use web interface to the 
    95 shakespeare material and facilities by doing:: 
     93   shakespeare material and facilities by doing:: 
    9694 
    97     $ paster serve {your-config.ini} 
     95        $ paster serve {your-config.ini} 
    9896 
    99 NB: {your-config.ini} should be replaced with the name of the config file you 
     97   NB: {your-config.ini} should be replaced with the name of the config file you 
    10098created earlier. 
    10199 
     
    107105-------- 
    108106 
    109 Follow the basic steps above put with an ini file named: development.ini 
     107Follow the basic steps above but with an ini file named: development.ini 
    110108 
    111109NB: you'll probably want to change log levels to debug. 
     
    121119    $ nosetests shakespeare 
    122120''' 
    123 __version__ = '0.6a' 
     121__version__ = '0.7a' 
    124122__application_name__ = 'shakespeare' 
    125123 
  • shakespeare/cache.py

    r98 r169  
    11import os 
     2import urlparse 
    23import urllib 
    3  
    4 import shakespeare 
    5 conf = shakespeare.conf() 
    64 
    75class Cache(object): 
     
    97    """ 
    108 
    11     def __init__(self, cache_path): 
     9    def __init__(self, cache_path, fullpath=True): 
     10        ''' 
     11        @param fullpath: save to local path corresponding to full url path 
     12            (creating directories as necessary) when retrieving. 
     13        ''' 
    1214        self.cache_path = cache_path 
     15        self.fullpath = fullpath 
    1316 
    1417    def path(self, remote_url, version=''): 
     
    1619        @type: string giving version of text (''|'cleaned') 
    1720        """ 
    18         protocolEnd = remote_url.index(':') + 3  # add 3 for :// 
    19         path = remote_url[protocolEnd:] 
    20         base, name = os.path.split(path) 
     21        urlparts = urlparse.urlparse(remote_url) 
     22        base = urlparts[1] 
     23        pathparts = urlparts[2].split('/') 
     24        if len(pathparts) > 1: 
     25            base = os.path.join(base, *pathparts[:-1]) 
     26        name = pathparts[-1] 
    2127        name = version + name 
    22         offset = os.path.join(base, name) 
    23         localPath = self.path_from_offset(offset) 
    24         return localPath 
     28        if self.fullpath: 
     29            offset = os.path.join(base, name) 
     30        else: 
     31            offset = name 
     32        local_path = self.path_from_offset(offset) 
     33        return local_path 
     34 
     35    def path_from_offset(self, offset): 
     36        "Get full path of file in cache given by offset." 
     37        return os.path.join(self.cache_path, offset) 
    2538 
    2639    def download_url(self, url, overwrite=False): 
     
    4255            urllib.urlretrieve(url, localPath) 
    4356 
    44     def path_from_offset(self, offset): 
    45         "Get full path of file in cache given by offset." 
    46         return os.path.join(self.cache_path, offset) 
    4757 
     58try: 
     59    import shakespeare 
     60    conf = shakespeare.conf() 
    4861 
    49 default_path = shakespeare.conf()['cachedir'] 
    50 default = Cache(default_path) 
     62    default_path = shakespeare.conf()['cachedir'] 
     63    default = Cache(default_path) 
     64except: 
     65    pass 
    5166 
  • shakespeare/gutenberg.py

    r141 r169  
    144144        self.cache = cache 
    145145        self._index = works 
     146 
     147    def vprint(self, info, force=True): 
     148        if self.verbose or force: 
     149            print(info) 
    146150      
    147151    def _filter_index(self, line): 
  • shakespeare/tests/test_cache.py

    r134 r169  
    2121        exp = os.path.join(self.cache_path, self.url[7:]) 
    2222        out = self.cache.path(self.url) 
    23         assert out == exp 
     23        assert out == exp, (out, exp) 
    2424 
    2525    def test_path_2(self): 
     
    2929        assert exp == out 
    3030 
     31    def test_path_without_fullpath(self): 
     32        cache2 = shakespeare.cache.Cache('', fullpath=False) 
     33        out = cache2.path(self.url) 
     34        assert out == 'GUTINDEX.ALL' 
     35 
    3136    def test_download_url(self): 
    3237        exp = os.path.join(self.cache_path, self.url2[7:])