4from sqlalchemy import sql
6import swiss.tabular as T
9import pdw.model as model
16 path = os.path.join(OUTIDIR, '%s_%s' % (PREFIX, offset))
19def num_works_with_items():
20 q = model.work_2_item.count()
21 out = q.execute().fetchall()[0][0]
24def death_date_in_future():
25 # ptab = model.person_table
27 q = model.Person.query
29 ptab.death_date_ordered>2015,
30 ptab.birth_date_ordered>2000,
33 print '## Total: %s' % total
41 q = model.Person.query
42 q = q.filter(ptab.death_date_ordered>1930).\
43 filter(ptab.death_date_ordered<1938)
45 print '## Total: %s' % total
51def _entry(data, total):
52 out ='%s (%i%%)' % (data, 100*float(data)/total)
56 stats = pdw.stats.Stats.default()
57 pdrel = stats.pdrel_for_item_by_decade()
59 td.header = [ u'Pub. Date', u'Total', u'No Author', u'Any Date', 'Death Date' ]
60 for years in sorted(pdrel.keys()):
64 _entry(data['no_person'], total),
65 _entry(data['any_date'], total),
66 _entry(data['death_date'], total),
69 writer = T.LatexWriter()
70 # writer = T.HtmlWriter(pretty_print=True)
71 out = writer.write_str(td)
75 stats = pdw.stats.Stats.default()
76 directpd = stats.fast_pd_for_item_by_decade()
77 pdrel = stats.pdrel_for_item_by_decade()
79 td.header = [ u'Pub. Date', u'Total', u'PD', u'Not PD', '?',
82 for years in sorted(directpd.keys()):
83 data = directpd[years]
85 pd, not_pd = data['pd'], data['not_pd']
88 _entry(not_pd, total),
89 _entry(data['undetermined'], total),
90 '%i%%' % (100 * float(pd)/(pd+not_pd)),
91 '%i%%' % (100 * float(pd) / pdrel[years]['any_date'])
94 writer = T.LatexWriter()
95 # writer = T.HtmlWriter(pretty_print=True)
96 out = writer.write_str(td)
99def work_item_counts():
100 sqltext = '''SELECT work_id, count(*) from work_2_item
104 engine = model.metadata.bind
105 out = engine.execute(sql.text(sqltext)).fetchall()
108'''Can we afford to cache?
110def person_work_and_item_counts():
112 sqltext = '''SELECT count(*), person.name, person.id FROM item_2_person
113 JOIN person ON item_2_person.person_id = person.id
114 GROUP BY person.id, person.name
115 ORDER BY count(*) DESC
117 engine = model.metadata.bind
118 out = engine.execute(sql.text(sqltext)).fetchall()
119 td = T.TabularData(header=['Rank', 'No. of Items', 'Name'])
120 bad_names = [ 'Great Britain. Parliament.',
123 'Church of England.',
124 'England and Wales. Sovereign (1660-1685 : Charles II)',
128 td.data = [ list(x[:2]) for x in out[:200] ] # if x[1] not in bad_names ]
129 td.data = [ [ii+1] + x for ii,x in enumerate(td.data) ]
130 td.data = td.data[:50]
131 # writer = T.TxtWriter()
132 writer = T.HtmlWriter(pretty_print=True)
133 print writer.write_str(td)
135 counts = [ x[0] for x in out ]
136 import matplotlib.pyplot as plt
138 countslog = S.log(counts)
139 params = { 'color': 'red', 'alpha': 0.7 }
140 lparams = { 'color': 'black', 'linewidth': 2 }
141 # plt.bar(range(cutoff), counts[:cutoff], **params)
142 plt.plot(range(cutoff), counts[:cutoff], 'k-', **lparams)
145 plt.savefig(_fn('person-item-by-rank.png'))
148 plt.semilogy(range(cutoff), counts[:cutoff], **lparams)
151 plt.savefig(_fn('person-item-by-rank-logy.png'))
154 plt.semilogy(range(cutoff), countslog[:cutoff], **lparams)
155 plt.ylabel('Log Counts (log)')
157 plt.savefig(_fn('person-item-by-rank-2logy.png'))
160 plt.semilogy(S.log(list(range(1,cutoff+1))), counts[:cutoff], **lparams)
161 plt.ylabel('Counts (log)')
162 plt.xlabel('Log Rank')
163 plt.savefig(_fn('person-item-by-rank-logxlogy.png'))
166 params = { 'fc': 'red', 'alpha': 0.7 }
167 plt.hist(countslog, bins=100, **params)
168 plt.xlabel('Log Counts')
170 plt.savefig(_fn('person-item-hist-logx.png'))
173 plt.hist(countslog, bins=100, log=True, **params)
174 plt.xlabel('Log Counts')
175 plt.ylabel('Freq (Log)')
176 plt.savefig(_fn('person-item-hist-logxlogy.png'))
178def person_item_counts(name):
179 sqltext = '''SELECT count(*), person.name, person.id FROM item_2_person
180 JOIN person ON item_2_person.person_id = person.id
181 WHERE person.name LIKE '%s'
182 GROUP BY person.id, person.name
183 ORDER BY count(*) DESC
184 ''' % ('%%'+name+'%%')
185 engine = model.metadata.bind
187 out = engine.execute(sql.text(sqltext)).fetchall()
191def plot_pd_over_time():
192 stats = pdw.stats.Stats.default()
193 out = stats.fast_pd_year()
194 totals = stats.by_year_all()['item']
195 import matplotlib.pyplot as plt
196 years, counts = zip(*out)
197 tyears, tcounts = zip(*totals)
198 plt.bar(tyears, tcounts, color='blue') # , edgecolor='blue')
199 plt.bar(years, counts, color='red', edgecolor='red')
200 plt.xlim(xmin=1600, xmax=1960)
201 plt.savefig(_fn('pd-over-time.png'))
205 import pdw.model as model
207 # have to have died before jan 1st 70 years ago
209 entering_pd = model.Person.query.\
210 filter(model.Person.death_date_ordered<now-70).\
211 filter(model.Person.death_date_ordered>=now-70-1)
212 people = entering_pd.all()
213 for person in people[:10]:
214 print person.name, person.birth_date, person.death_date
221if __name__ == '__main__':
222 logging.basicConfig(level=logging.INFO)
223 local_methods = dict(locals())
224 local_methods = [ x for x in local_methods if
225 isinstance(local_methods[x], types.FunctionType) ]
226 local_methods = filter(lambda x: not x.startswith('_'), local_methods)
228 usage = '''%prog {action}
231 usage += ('\n '.join(local_methods))
232 parser = optparse.OptionParser(usage)
233 parser.add_option('-c', '--config')
234 options, args = parser.parse_args()
236 if not args or not options.config or not args[0] in local_methods:
241 cfg = os.path.abspath(options.config)
243 PREFIX = pdw.default_prefix()
244 OUTIDIR = pdw.default_outdir()
246 locals()[method](*args[1:])