Changeset 169:30df1ae8d73a
- Timestamp:
- 01/06/09 18:47:18 (14 months ago)
- Author:
- rgrp
- Branch:
- default
- convert_revision:
- svn:0ead1229-0713-0410-96cd-f668dbfad531/trunk@247
- Message:
-
[shksprdata,moby][m]: update getdata/moby.py and start work on xsl transforms of moby texts.
- shakespeare/cache: minor
- minor refactoring
- support for not downloading to nested directories (e.g. abc.com/abc.txt goes on disk to abc.txt not abc.com/abc.txt).
- shksprdata/getdata/moby.py:
- was broken so fix up (making simpler at same time)
- create metadata file at same time as downloading files (and rename files)
- start work on xsl transforms using iain's xslt file
- shakespeare/init.py: fix to documentation and bump version to 0.7
- MANIFEST.in: fix incorrect recursive includes (comment out in fact as not needed)
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r103
|
r169
|
|
| 1 | 1 | recursive-include shakespeare/public * |
| 2 | 2 | recursive-include shakespeare/templates * |
| 3 | | recursive-include shksprdata * |
| | 3 | # seem to be automatically included ... |
| | 4 | # recursive-include shksprdata *.txt |
| | 5 | # recursive-include shksprdata *.xml |
| | 6 | # recursive-include miltondata *.txt |
| | 7 | # recursive-include miltondata *.xml |
-
|
r153
|
r169
|
|
| 4 | 4 | |
| 5 | 5 | The Open Shakespeare package provides a full open set of shakespeare's works |
| 6 | | (often in multiple versions) along with ancillary material, a variety of tools |
| 7 | | and a python API. |
| 8 | | |
| 9 | | Specifically in addition to the works themselves (often in multiple versions) |
| 10 | | there is an introduction, a chronology, explanatory notes, a concordance and |
| 11 | | search facilities. |
| | 6 | (often in multiple versions) along with ancillary material, a variety of tools, |
| | 7 | a python API and a web interface that provides access to many (but not all) of |
| | 8 | these facilities from the comfort of your web browser (see |
| | 9 | http://www.openshakespeare.org/ for a demo). |
| 12 | 10 | |
| 13 | 11 | All material is open source/open knowledge so that anyone can use, redistribute |
| … |
… |
|
| 89 | 87 | |
| 90 | 88 | 2. To load the author packages, change into the miltondata or shksprdata directories |
| 91 | | and run the command load-milton (or load-shakespeare) -c <path to your development.ini> |
| 92 | | This will load the metadata text into the database. |
| | 89 | and run the command load-milton (or load-shakespeare) -c <path to your |
| | 90 | development.ini> This will load the metadata text into the database. |
| 93 | 91 | |
| 94 | 92 | 3. You can start a web server to provide a easy-to-use web interface to the |
| 95 | | shakespeare material and facilities by doing:: |
| | 93 | shakespeare material and facilities by doing:: |
| 96 | 94 | |
| 97 | | $ paster serve {your-config.ini} |
| | 95 | $ paster serve {your-config.ini} |
| 98 | 96 | |
| 99 | | NB: {your-config.ini} should be replaced with the name of the config file you |
| | 97 | NB: {your-config.ini} should be replaced with the name of the config file you |
| 100 | 98 | created earlier. |
| 101 | 99 | |
| … |
… |
|
| 107 | 105 | -------- |
| 108 | 106 | |
| 109 | | Follow the basic steps above put with an ini file named: development.ini |
| | 107 | Follow the basic steps above but with an ini file named: development.ini |
| 110 | 108 | |
| 111 | 109 | NB: you'll probably want to change log levels to debug. |
| … |
… |
|
| 121 | 119 | $ nosetests shakespeare |
| 122 | 120 | ''' |
| 123 | | __version__ = '0.6a' |
| | 121 | __version__ = '0.7a' |
| 124 | 122 | __application_name__ = 'shakespeare' |
| 125 | 123 | |
-
|
r98
|
r169
|
|
| 1 | 1 | import os |
| | 2 | import urlparse |
| 2 | 3 | import urllib |
| 3 | | |
| 4 | | import shakespeare |
| 5 | | conf = shakespeare.conf() |
| 6 | 4 | |
| 7 | 5 | class Cache(object): |
| … |
… |
|
| 9 | 7 | """ |
| 10 | 8 | |
| 11 | | def __init__(self, cache_path): |
| | 9 | def __init__(self, cache_path, fullpath=True): |
| | 10 | ''' |
| | 11 | @param fullpath: save to local path corresponding to full url path |
| | 12 | (creating directories as necessary) when retrieving. |
| | 13 | ''' |
| 12 | 14 | self.cache_path = cache_path |
| | 15 | self.fullpath = fullpath |
| 13 | 16 | |
| 14 | 17 | def path(self, remote_url, version=''): |
| … |
… |
|
| 16 | 19 | @type: string giving version of text (''|'cleaned') |
| 17 | 20 | """ |
| 18 | | protocolEnd = remote_url.index(':') + 3 # add 3 for :// |
| 19 | | path = remote_url[protocolEnd:] |
| 20 | | base, name = os.path.split(path) |
| | 21 | urlparts = urlparse.urlparse(remote_url) |
| | 22 | base = urlparts[1] |
| | 23 | pathparts = urlparts[2].split('/') |
| | 24 | if len(pathparts) > 1: |
| | 25 | base = os.path.join(base, *pathparts[:-1]) |
| | 26 | name = pathparts[-1] |
| 21 | 27 | name = version + name |
| 22 | | offset = os.path.join(base, name) |
| 23 | | localPath = self.path_from_offset(offset) |
| 24 | | return localPath |
| | 28 | if self.fullpath: |
| | 29 | offset = os.path.join(base, name) |
| | 30 | else: |
| | 31 | offset = name |
| | 32 | local_path = self.path_from_offset(offset) |
| | 33 | return local_path |
| | 34 | |
| | 35 | def path_from_offset(self, offset): |
| | 36 | "Get full path of file in cache given by offset." |
| | 37 | return os.path.join(self.cache_path, offset) |
| 25 | 38 | |
| 26 | 39 | def download_url(self, url, overwrite=False): |
| … |
… |
|
| 42 | 55 | urllib.urlretrieve(url, localPath) |
| 43 | 56 | |
| 44 | | def path_from_offset(self, offset): |
| 45 | | "Get full path of file in cache given by offset." |
| 46 | | return os.path.join(self.cache_path, offset) |
| 47 | 57 | |
| | 58 | try: |
| | 59 | import shakespeare |
| | 60 | conf = shakespeare.conf() |
| 48 | 61 | |
| 49 | | default_path = shakespeare.conf()['cachedir'] |
| 50 | | default = Cache(default_path) |
| | 62 | default_path = shakespeare.conf()['cachedir'] |
| | 63 | default = Cache(default_path) |
| | 64 | except: |
| | 65 | pass |
| 51 | 66 | |
-
|
r141
|
r169
|
|
| 144 | 144 | self.cache = cache |
| 145 | 145 | self._index = works |
| | 146 | |
| | 147 | def vprint(self, info, force=True): |
| | 148 | if self.verbose or force: |
| | 149 | print(info) |
| 146 | 150 | |
| 147 | 151 | def _filter_index(self, line): |
-
|
r134
|
r169
|
|
| 21 | 21 | exp = os.path.join(self.cache_path, self.url[7:]) |
| 22 | 22 | out = self.cache.path(self.url) |
| 23 | | assert out == exp |
| | 23 | assert out == exp, (out, exp) |
| 24 | 24 | |
| 25 | 25 | def test_path_2(self): |
| … |
… |
|
| 29 | 29 | assert exp == out |
| 30 | 30 | |
| | 31 | def test_path_without_fullpath(self): |
| | 32 | cache2 = shakespeare.cache.Cache('', fullpath=False) |
| | 33 | out = cache2.path(self.url) |
| | 34 | assert out == 'GUTINDEX.ALL' |
| | 35 | |
| 31 | 36 | def test_download_url(self): |
| 32 | 37 | exp = os.path.join(self.cache_path, self.url2[7:]) |