Changeset 194:76629f68bc4a
- Timestamp:
- 09/14/08 18:39:10 (2 years ago)
- Author:
- rgrp
- Branch:
- default
- convert_revision:
- svn:10edda23-d834-0410-9182-b00384516d49/trunk@206
- Message:
-
[factlet/new][l]: implement factlet creation from a wikipedia url in WUI.
- dbpedia:
- put in tests for to_factlet and fix issues with that.
- fix up verbose support in Describe
- factlet: support for factlet creation from a wikipedia url + test.
- Location:
- microfacts
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r190
|
r194
|
|
| | 1 | import logging |
| 1 | 2 | import simplejson as sj |
| 2 | 3 | import genshi |
| | 4 | |
| | 5 | logging.getLogger(__name__) |
| 3 | 6 | |
| 4 | 7 | from microfacts.lib.base import * |
| 5 | 8 | from microfacts.modes import * |
| 6 | 9 | import microfacts.lib.json |
| | 10 | |
| | 11 | # not everyone will have the libraries installed for working with dbpedia |
| | 12 | dbpedia_enabled = True |
| | 13 | try: |
| | 14 | from microfacts.getdata.dbpedia import * |
| | 15 | except: |
| | 16 | dbpedia_enabled = False |
| 7 | 17 | |
| 8 | 18 | class FactletController(BaseController): |
| … |
… |
|
| 34 | 44 | |
| 35 | 45 | def new(self): |
| 36 | | return render('factlet/new') |
| | 46 | c.dbpedia_enabled = dbpedia_enabled |
| | 47 | if not 'commit' in request.params: # no form submitted |
| | 48 | return render('factlet/new') |
| 37 | 49 | |
| 38 | | def create(self): |
| 39 | | if 'title' in request.params: |
| | 50 | title = request.params.getone('title').strip() |
| | 51 | url = request.params.getone('url').strip() |
| | 52 | if title: |
| | 53 | logger.debug('Creating new factlet with title: %s' % title) |
| 40 | 54 | registry_path = '/factlet' |
| 41 | 55 | entity_data = { |
| … |
… |
|
| 50 | 64 | h.redirect_to(controller='factlet', action='update', |
| 51 | 65 | id=c.factlet.id) |
| 52 | | else: |
| | 66 | else: # this should really tell the user what was wrong |
| 53 | 67 | abort(self.mode.response_code) |
| | 68 | elif url: |
| | 69 | logger.debug('Creating new factlet from url: %s' % url) |
| | 70 | # TODO: check dbpedia_enabled is True? |
| | 71 | d = microfacts.getdata.dbpedia.Describe() |
| | 72 | d.execute(url) |
| | 73 | try: |
| | 74 | c.factlet = d.to_factlet() |
| | 75 | except Exception, inst: |
| | 76 | c.error = str(inst) |
| | 77 | return render('factlet/new') |
| | 78 | h.redirect_to(controller='factlet', action='update', |
| | 79 | id=c.factlet.id) |
| 54 | 80 | else: |
| | 81 | # Error? |
| | 82 | # c.error = 'No title or url supplied' |
| 55 | 83 | return render('factlet/new') |
| | 84 | |
| 56 | 85 | |
| 57 | 86 | def read_core(self, id): |
-
|
r192
|
r194
|
|
| 75 | 75 | class Describe: |
| 76 | 76 | |
| 77 | | def __init__(self): |
| | 77 | def __init__(self, verbose=False): |
| 78 | 78 | self.results = [] |
| 79 | | self.verbose = False |
| | 79 | self.verbose = verbose |
| 80 | 80 | self.language = 'en' |
| 81 | 81 | |
| 82 | 82 | def execute(self, uri): |
| | 83 | if 'wikipedia.org' in uri: |
| | 84 | uri = self._convert_wikipedia_url(uri) |
| 83 | 85 | self.uri = uri |
| 84 | 86 | query2 = PREFIXES + ''' |
| … |
… |
|
| 93 | 95 | # print self.results.all_nodes() |
| 94 | 96 | |
| 95 | | def to_str(self, verbose=False): |
| | 97 | def _convert_wikipedia_url(self, url): |
| | 98 | import urlparse |
| | 99 | path = urlparse.urlparse(url)[2] |
| | 100 | name = path.split('/')[-1] |
| | 101 | dbpedia_uri = 'http://dbpedia.org/resource/' + name |
| | 102 | return dbpedia_uri |
| | 103 | |
| | 104 | def to_str(self): |
| 96 | 105 | output = self.uri + '\n\n' |
| 97 | | if verbose: |
| | 106 | if self.verbose: |
| 98 | 107 | for s,p,o in self.results: |
| 99 | 108 | output += u'%s, %s, %s\n' % (s,p,o) |
| … |
… |
|
| 109 | 118 | kwds = { 'title' : None, |
| 110 | 119 | 'description' : None, |
| 111 | | 'start' : None, |
| 112 | | 'end' : None, |
| | 120 | 'start' : '', |
| | 121 | 'end' : '', |
| 113 | 122 | 'source': None, |
| 114 | 123 | 'place': [], |
| 115 | 124 | 'long': None, |
| 116 | 125 | 'lat': None, |
| 117 | | 'location' : {'type': 'Point', 'coordinates': [None, None]}, |
| 118 | | 'image': [], |
| 119 | | 'license': 'GFDL' |
| | 126 | 'image_urls': [], |
| | 127 | 'license': u'GFDL' |
| 120 | 128 | } |
| 121 | 129 | for s,p,o in self.results: |
| … |
… |
|
| 159 | 167 | kwds['source'] = unicode(o) |
| 160 | 168 | elif str(p) == 'http://xmlns.com/foaf/0.1/img': |
| 161 | | kwds['image'] = kwds['image'] + [unicode(o)] |
| | 169 | kwds['image_urls'] = kwds['image_urls'] + [unicode(o)] |
| 162 | 170 | |
| 163 | 171 | # get lat/long indirectly ... |
| … |
… |
|
| 177 | 185 | # once we have one set we don't need any more ... |
| 178 | 186 | break |
| 179 | | kwds['location']['coordinates'][0] = kwds['long'] |
| 180 | | kwds['location']['coordinates'][1] = kwds['lat'] |
| 181 | 187 | return kwds |
| 182 | 188 | |
| … |
… |
|
| 214 | 220 | raise ValueError('%s is not a useable date object' % o) |
| 215 | 221 | except Exception, inst: |
| 216 | | print 'Problem with extracting date from: %s' % o |
| 217 | | try: |
| 218 | | print inst |
| 219 | | except: |
| 220 | | pass |
| | 222 | if self.verbose: |
| | 223 | print 'Problem with extracting date from: %s' % o |
| | 224 | try: |
| | 225 | print inst |
| | 226 | except: |
| | 227 | pass |
| 221 | 228 | return dates |
| 222 | 229 | |
| … |
… |
|
| 228 | 235 | from microfacts.lib.json import FactletConverter |
| 229 | 236 | conv = FactletConverter() |
| 230 | | conv.to_domain_object(kwds) |
| 231 | | model.Session.flush() |
| | 237 | # convert kwds dates back to strings ... |
| | 238 | kwds['start'] = str(kwds['start']) |
| | 239 | kwds['end'] = str(kwds['end']) |
| | 240 | # convert lists to non-lists (very crudely) |
| | 241 | if kwds['image_urls']: |
| | 242 | kwds['image'] = kwds['image_urls'][0] |
| | 243 | if kwds['long']: |
| | 244 | kwds['location'] = { 'type': 'Point', coordinates:None } |
| | 245 | kwds['location']['coordinates'] = [ kwds['long'], kwds['lat'] ] |
| | 246 | fct = conv.to_domain_object(kwds) |
| | 247 | return fct |
| 232 | 248 | |
| 233 | 249 | |
| 234 | 250 | def describe(uri, verbose=False): |
| 235 | | d = Describe() |
| | 251 | d = Describe(verbose) |
| 236 | 252 | d.execute(uri) |
| 237 | | return d.to_str(verbose=verbose) |
| | 253 | return d.to_str() |
| 238 | 254 | |
| 239 | 255 | |
-
|
r192
|
r194
|
|
| 111 | 111 | describe <uri> |
| 112 | 112 | search <category-name> |
| 113 | | wp <wikipedia-url> |
| 114 | 113 | |
| 115 | 114 | Examples: |
| 116 | 115 | |
| 117 | 116 | describe http://dbpedia.org/resource/Admiralty_Islands_campaign |
| | 117 | describe http://en.wikipedia.org/wiki/Napoleon_I_of_France |
| 118 | 118 | search Category:Battles_and_operations_of_World_War_II |
| 119 | | wp http://en.wikipedia.org/wiki/Napoleon_I_of_France |
| 120 | 119 | ''' |
| 121 | 120 | summary = __doc__.split('\n')[0] |
| … |
… |
|
| 141 | 140 | category = self.args[1] |
| 142 | 141 | result = dbp.category_search(category) |
| 143 | | elif cmd == 'wp': |
| 144 | | import urlparse |
| 145 | | url = self.args[1] |
| 146 | | path = urlparse.urlparse(url)[2] |
| 147 | | name = path.split('/')[-1] |
| 148 | | dbpedia_uri = 'http://dbpedia.org/resource/' + name |
| 149 | | result = dbp.describe(dbpedia_uri, verbose=self.verbose) |
| 150 | 142 | else: |
| 151 | 143 | msg = 'Command %s not recognized' % cmd |
-
|
r193
|
r194
|
|
| 186 | 186 | |
| 187 | 187 | mapper(User, user_table, properties={ |
| 188 | | 'factlets':orm.relation(Factlet, secondary=user_2_factlet, backref='owners'), |
| 189 | | 'threads':orm.relation(Thread, secondary=user_2_thread, backref='owners'), |
| | 188 | 'factlets':orm.relation( |
| | 189 | Factlet, |
| | 190 | secondary=user_2_factlet, |
| | 191 | backref='owners'), |
| | 192 | 'threads':orm.relation(Thread, |
| | 193 | secondary=user_2_thread, |
| | 194 | backref='owners'), |
| 190 | 195 | }) |
-
|
r190
|
r194
|
|
| 23 | 23 | <li>${h.link_to('Home', h.url_for(controller='factlet', action='index', id=None))}</li> |
| 24 | 24 | <li>${h.link_to('List', h.url_for(controller='factlet', action='list', id=None))}</li> |
| 25 | | <li>${h.link_to('New', h.url_for(controller='factlet', action='create', id=None))}</li> |
| | 25 | <li>${h.link_to('Create', h.url_for(controller='factlet', action='new', id=None))}</li> |
| 26 | 26 | </ul> |
| 27 | 27 | </aside> |
-
|
r51
|
r194
|
|
| 1 | | <!DOCTYPE html> |
| 2 | 1 | <html |
| 3 | 2 | xmlns="http://www.w3.org/1999/xhtml" |
| … |
… |
|
| 10 | 9 | |
| 11 | 10 | <head> |
| 12 | | <title>New</title> |
| | 11 | <title>Create</title> |
| 13 | 12 | </head> |
| 14 | 13 | |
| 15 | 14 | <body> |
| 16 | | <h2>Register New Factlet</h2> |
| 17 | | <p>Please choose a title for the factlet.</p> |
| 18 | | <form action="${h.url_for(controller='factlet', action='create')}" method="post"> |
| | 15 | <h2>Create a New Factlet</h2> |
| | 16 | <p py:if="c.error"><strong>There was an error: ${c.error}</strong></p> |
| | 17 | <form action="" method="post"> |
| | 18 | <p>Please choose a title for the factlet.</p> |
| 19 | 19 | <label for="title">Title:</label> |
| 20 | 20 | ${XML(h.text_field('title', size=30))} |
| 21 | 21 | <br /><br /> |
| | 22 | |
| | 23 | <div py:strip="True" py:if="c.dbpedia_enabled"> |
| | 24 | |
| | 25 | <p><strong>OR</strong> enter a Wikipedia (or DBPedia) url:</p> |
| | 26 | <label for="url">Url:</label> |
| | 27 | ${XML(h.text_field('url', size=30))} |
| | 28 | |
| | 29 | <p class="desc">The factlet will be generated from the information in that |
| | 30 | article.</p> |
| | 31 | </div> |
| | 32 | |
| 22 | 33 | ${XML(h.submit('Create new factlet'))} |
| 23 | 34 | </form> |
-
|
r190
|
r194
|
|
| 166 | 166 | form = response.forms[0] |
| 167 | 167 | form['title'] = self.title |
| 168 | | response = form.submit(status=[302]) |
| | 168 | # even though only one button if you do not specify name 'commit' is |
| | 169 | # not in post params (which messes up controller function) |
| | 170 | response = form.submit('commit', status=[302]) |
| 169 | 171 | response = response.follow() |
| 170 | 172 | assert 'Factlets - Edit' in response, response |
| … |
… |
|
| 172 | 174 | assert self.ft |
| 173 | 175 | |
| | 176 | import microfacts.controllers.factlet |
| | 177 | class TestFactletCreateViaWikipedia(TestController): |
| | 178 | |
| | 179 | # nose attributes |
| | 180 | __test__ = microfacts.controllers.factlet.dbpedia_enabled |
| | 181 | external = True |
| | 182 | tags = [ 'dbpedia' ] |
| | 183 | |
| | 184 | title = u'Philip II of Spain' |
| | 185 | url = u'http://en.wikipedia.org/wiki/Philip_II_of_Spain' |
| | 186 | |
| | 187 | def tearDown(self): |
| | 188 | ft = model.Factlet.query.filter_by(source=self.url).one() |
| | 189 | ft.delete() |
| | 190 | model.Session.flush() |
| | 191 | model.Session.remove() |
| | 192 | |
| | 193 | def test_create(self): |
| | 194 | path = url_for(controller='factlet', action='new') |
| | 195 | response = self.app.get(path) |
| | 196 | assert 'enter a Wikipedia (or DBPedia resouce) url:' |
| | 197 | form = response.forms[0] |
| | 198 | form['url'] = self.url |
| | 199 | response = form.submit('commit', status=[302]) |
| | 200 | response = response.follow() |
| | 201 | assert 'Factlets - Edit' in response, response |
| | 202 | self.ft = model.Factlet.query.filter_by(source=self.url).one() |
| | 203 | assert self.ft.title == self.title |
| | 204 | # May 21, 1527 (1527-05-21) |
| | 205 | assert self.ft.start.year == 1527 |
| | 206 | |
-
|
r192
|
r194
|
|
| 1 | 1 | # not everyone will have the libraries installed for working with dbpedia |
| | 2 | dotest = True |
| 2 | 3 | try: |
| 3 | 4 | from microfacts.getdata.dbpedia import * |
| 4 | 5 | except: |
| 5 | | pass |
| | 6 | dotest = False |
| 6 | 7 | |
| 7 | | class TestDescribe: |
| 8 | | __test__ = False |
| | 8 | class DbpediaBase(object): |
| | 9 | __test__ = dotest |
| | 10 | external = True |
| | 11 | tags = [ 'dbpedia' ] |
| | 12 | |
| | 13 | class TestDescribe(DbpediaBase): |
| 9 | 14 | |
| 10 | 15 | def test_extract_1(self): |
| … |
… |
|
| 30 | 35 | assert kwds['title'] == u'Invasion of Normandy' |
| 31 | 36 | |
| 32 | | class TestDescribe2(object): |
| 33 | | __test__ = False |
| | 37 | class TestDescribe2(DbpediaBase): |
| 34 | 38 | |
| 35 | 39 | @classmethod |
| 36 | 40 | def setup_class(self): |
| 37 | 41 | uri3 = 'http://dbpedia.org/resource/Battle_of_Austerlitz' |
| 38 | | d = Describe() |
| 39 | | d.execute(uri3) |
| 40 | | self.kwds = d.extract(recurse_for_location=True) |
| | 42 | self.d = Describe() |
| | 43 | self.d.execute(uri3) |
| | 44 | self.kwds = self.d.extract(recurse_for_location=True) |
| | 45 | self.fct = None |
| | 46 | self.start = datetime.datetime(1805, 12, 2) |
| | 47 | |
| | 48 | @classmethod |
| | 49 | def teardown_class(self): |
| | 50 | import microfacts.model as model |
| | 51 | if self.fct: |
| | 52 | self.fct.delete() |
| | 53 | model.Session.flush() |
| | 54 | model.Session.remove() |
| 41 | 55 | |
| 42 | 56 | def test_date(self): |
| 43 | | assert self.kwds['start'] == datetime.datetime(1805, 12, 2) |
| | 57 | assert self.kwds['start'] == self.start |
| 44 | 58 | |
| 45 | 59 | def test_source(self): |
| … |
… |
|
| 50 | 64 | assert self.kwds['long'] == 16.76361083984375 |
| 51 | 65 | |
| | 66 | def test_to_factlet(self): |
| | 67 | self.fct = self.d.to_factlet() |
| | 68 | assert self.fct.id |
| | 69 | long = round(self.fct.location.x,1) |
| | 70 | assert long == 16.8, long |
| | 71 | assert self.fct.start == self.start |
| | 72 | |
| | 73 | |
| | 74 | class TestDescribeWikipedia(DbpediaBase): |
| | 75 | |
| | 76 | uri = 'http://en.wikipedia.org/wiki/Battle_of_Austerlitz' |
| | 77 | |
| | 78 | def test__convert_wikipedia_url(self): |
| | 79 | d = Describe() |
| | 80 | out = d._convert_wikipedia_url(self.uri) |
| | 81 | assert out == 'http://dbpedia.org/resource/Battle_of_Austerlitz', out |
| | 82 | |
| | 83 | def test_1(self): |
| | 84 | d = Describe() |
| | 85 | d.execute(self.uri) |
| | 86 | self.kwds = d.extract() |
| | 87 | assert self.kwds['title'] == 'Battle of Austerlitz' |
| | 88 | assert self.kwds['source'] == self.uri |
| | 89 | |