Package oort :: Module rdfview
[hide private]
[frames] | no frames]

Source Code for Module oort.rdfview

  1  # -*- coding: UTF-8 -*- 
  2  """This module contains a system for creating rdf query classes in a mainly  
  3  declarative manner. These are built by subclassing ``RdfQuery`` and creating  
  4  class attributes by using ``Selector``:s. There are several selectors provided  
  5  in this module to cover all regular cases of data acquisition. 
  6  """ 
  7  #======================================================================= 
  8  from itertools import chain 
  9  from types import ModuleType 
 10  import warnings 
 11  from rdflib import RDF, RDFS, Namespace, URIRef, BNode, Literal 
 12  from rdflib import ConjunctiveGraph 
 13  try: import simplejson 
 14  except ImportError: simplejson = None 
 15  from oort.util.code import contract 
 16  #======================================================================= 
 17   
 18   
 19  # TODO: deprecate THIS_QUERY and then remove this? 
 20  THIS_QUERY = object() 
 21   
 22  MODULE_SEP = ':' 
 23   
 24   
 25  #----------------------------------------------------------------------- 
 26   
 27   
28 -class Selector(object):
29 __slots__ = ('predicate', '_namespace', 'filters', 30 '_subQueryMarker', '_finalSubQuery', 31 '_name', '_queryClass') 32
33 - def __init__(self, predBase=None, subQuery=None):
34 self.predicate = None 35 self._namespace = None 36 if isinstance(predBase, Namespace) or isinstance(predBase, ModuleType): 37 self._namespace = predBase 38 elif isinstance(predBase, URIRef): 39 self.predicate = predBase 40 self._subQueryMarker = subQuery 41 self._finalSubQuery = False 42 self.filters = []
43 44 @contract.state_change
45 - def hook_into_rdf_query(self, name, queryClass):
46 self._name = name 47 self._queryClass = queryClass 48 if not self._namespace: 49 self._namespace = queryClass._namespace 50 if not self.predicate and self._namespace: 51 if isinstance(self._namespace, ModuleType): 52 self.predicate = getattr(self._namespace, name) 53 else: 54 self.predicate = self._namespace[name]
55 # TODO: not used since e.g. the selector decorator uses no predicate 56 #if not self.predicate: 57 # raise ValueError( 58 # "Could not determine predicate for Selector %s" % self) 59
60 - def get_sub_query(self):
61 final = self._finalSubQuery 62 if final is False: 63 marker = self._subQueryMarker 64 final = marker 65 if isinstance(marker, basestring): 66 if MODULE_SEP in marker: 67 module, marker = marker.split(MODULE_SEP) 68 else: 69 module = self._queryClass.__module__ 70 final = __import__(module, fromlist=['']).__dict__[marker] 71 elif marker is THIS_QUERY: 72 final = self._queryClass 73 self._finalSubQuery = final 74 return final
75
76 - def __get__(self, rdfQueryInstance, rdfQueryOwnerClass=None):
77 if not rdfQueryInstance: 78 return self 79 prep = rdfQueryInstance._preparedSelects[self._name] 80 if not prep.hasRun: 81 result = self.retreive_result(rdfQueryInstance, prep.selectArgs) 82 for fltr in self.filters: 83 result = fltr(result) 84 prep.hasRun = True 85 prep.result = result 86 return prep.result
87 88 @contract.default_method
89 - def retreive_result(self, rdfQueryInstance, selectArgs):
90 result = self._process_for_subqueries( 91 rdfQueryInstance, 92 self.select(*selectArgs), 93 *selectArgs[:2] 94 ) 95 return result
96
97 - def _process_for_subqueries(self, rdfQueryInstance, 98 rawresults, graph, lang):
99 returnList = isinstance(rawresults, list) 100 if not rawresults: 101 if returnList: return [] 102 else: return None 103 104 subQuery = self.get_sub_query() 105 if not subQuery: 106 return rawresults 107 else: 108 # TODO: using THIS_QUERY (and no execCache?) may currently cause 109 # infinite loops..? But things are more lazy now; perhaps not.. 110 run_query = query_or_cached(subQuery, rdfQueryInstance._execCache) 111 if returnList: 112 return [run_query(graph, lang, uri) for uri in rawresults] 113 else: 114 return run_query(graph, lang, rawresults)
115 116 @contract.template_method
117 - def select(self, graph, lang, subject):
118 raise NotImplementedError 119 return None or []
120 121 @contract.default_method
122 - def back_to_graph(self, graph, subject, value):
123 pass
124
125 - def viewed_as(self, subQuery):
126 self._subQueryMarker = subQuery 127 return self
128
129 - def __rshift__(self, subQuery):
130 return self.viewed_as(subQuery)
131
132 - def add_filter(self, fltr):
133 self.filters.append(fltr)
134
135 - def __or__(self, fltr):
136 self.add_filter(fltr) 137 return self
138
139 - def __set__(self, rdfQueryInstance, value):
140 prep = rdfQueryInstance._preparedSelects[self._name] 141 lang = rdfQueryInstance._lang 142 sub = self.get_sub_query() 143 if isinstance(value, list): 144 value = [self.type_raw_value(val, lang) for val in value] 145 else: 146 value = self.type_raw_value(value, lang) 147 if sub: 148 if isinstance(value, list): 149 prep.result = [sub.from_dict(val, lang, BNode()) 150 for val in value] 151 else: 152 prep.result = sub.from_dict(value, lang, BNode()) 153 else: 154 prep.result = value 155 prep.hasRun = True
156 157 @contract.default_method
158 - def type_raw_value(self, value, lang):
159 # TODO: need more clever type mapping. also, allow {'_uri': ".." } to be resource? 160 if isinstance(value, basestring): 161 value = Literal(value) 162 return value
163 164
165 -class PreparedSelect(object):
166 __slots__ = ('selectArgs', 'result', 'hasRun')
167 - def __init__(self, graph, lang, subject):
168 self.selectArgs = (graph, lang, subject) 169 self.result = None 170 self.hasRun = False # TODO: make configurable?
171 172
173 -class _rdf_query_meta(type):
174 - def __init__(cls, clsName, bases, clsDict):
175 super(_rdf_query_meta, cls).__init__(clsName, bases, clsDict) 176 177 cls._selectors = selectors = {} 178 for base in bases: 179 if hasattr(base, '_selectors'): 180 selectors.update(base._selectors) 181 182 rdfBase = clsDict.get('_rdfbase_') 183 if not rdfBase: 184 for base in bases: 185 if hasattr(base, '_rdfbase_'): 186 rdfBase = base._rdfbase_ 187 break 188 if isinstance(rdfBase, Namespace): 189 cls._namespace = rdfBase 190 if not clsDict.get('RDF_TYPE'): 191 cls.RDF_TYPE = rdfBase[clsName] 192 else: 193 cls._namespace = None # TODO: pick from type? 194 195 for key, value in clsDict.items(): 196 if isinstance(value, Selector): 197 value.hook_into_rdf_query(key, cls) 198 selectors[key] = value
199 200
201 -class RdfQuery(object):
202 __metaclass__ = _rdf_query_meta 203 204 RDF_TYPE = RDFS.Resource 205 206 # TODO: test use of execCache propertly (it seems to work though) 207
208 - def __init__(self, graph, lang, subject, execCache=None):
209 self._graph = graph 210 self._subject = subject 211 self._lang = lang 212 self._preparedSelects = self._make_prepare_selects() 213 self._execCache = execCache
214
215 - def _make_prepare_selects(self):
216 prepareds = {} 217 graph, lang, subject = self._graph, self._lang, self._subject 218 for name, selector in self._selectors.items(): 219 if not subject: 220 # FIXME: happens when subject is a string/Literal - wrong in 221 # itself! Remove or signal error? As it is, it leads to 222 # illegible errors further down! 223 # Also, why not: if subject == u'': 224 #setattr(self, name, None) # TODO:removed; see this fixme 225 continue 226 prep = PreparedSelect(graph, lang, subject) 227 prepareds[name] = prep 228 return prepareds
229
230 - def __str__(self):
231 return str(self._subject)
232
233 - def __eq__(self, other):
234 if isinstance(other, RdfQuery): 235 return self._subject == other._subject 236 else: 237 return self._subject == other
238 239 @classmethod
240 - def bound_with(cls, subject, lang=None):
241 def bound_query(graph, _lang, _subject): 242 return cls(graph, lang or _lang, subject)
243 bound_query.query = cls 244 bound_query.__name__ = cls.__name__ 245 return bound_query
246 247 @classmethod
248 - def from_dict(cls, data, lang, subject):
249 graph = ConjunctiveGraph() 250 query = cls(graph, lang, subject) 251 for k, v in data.items(): 252 setattr(query, k, v) 253 return query
254 255 @classmethod
256 - def find_by(cls, graph, lang, execCache=None, **kwargs):
257 assert len(kwargs) == 1 258 name, value = kwargs.items()[0] 259 predicate = cls._selectors[name].predicate 260 for subject in graph.subjects(predicate, value): 261 yield query_or_cached(cls, execCache)(graph, lang, subject)
262 263 @property
264 - def uri(self):
265 return self._subject
266
267 - def get_selected_value(self, name):
268 return self._preparedSelects[name].result
269
270 - def to_graph(self, lgraph=None):
271 subject = self._subject or BNode() # FIXME: is this ok? 272 if not subject: return # FIXME, see fixme in __init__ 273 274 lgraph = lgraph or ConjunctiveGraph() 275 276 for t in self._graph.objects(subject, RDF.type): 277 lgraph.add((subject, RDF.type, t)) 278 279 for selector in self._selectors.values(): 280 value = selector.__get__(self) 281 if not value: 282 continue 283 selector.back_to_graph(lgraph, subject, value) 284 285 # FIXME: why is this happening; how can we prevent it? 286 for t in lgraph: 287 if None in t: lgraph.remove(t) 288 return lgraph
289
290 - def to_rdf(self):
291 return self.to_graph().serialize(format='pretty-xml')
292
293 - def to_dict(self, keepSubject=False):
294 d = {} 295 if keepSubject: 296 # TODO: sync with new property 'uri' 297 subjectKey = isinstance(keepSubject, str) and keepSubject or 'resource' 298 subj = self._subject 299 if subj and not isinstance(subj, BNode): 300 d[subjectKey] = self._subject 301 302 for selector in self._selectors.values(): 303 name = selector._name 304 value = selector.__get__(self) 305 if not value: 306 continue 307 if isinstance(value, dict): 308 d[name] = dict([(key, self.__dict_convert(val, keepSubject)) 309 for key, val in value.items()]) 310 elif hasattr(value, '__iter__'): 311 d[name] = [self.__dict_convert(val, keepSubject) 312 for val in value] 313 else: 314 d[name] = self.__dict_convert(value, keepSubject) 315 # TODO: handle xml literals 316 return d
317
318 - def __dict_convert(self, value, keepSubject):
319 if isinstance(value, RdfQuery): 320 return value.to_dict(keepSubject) 321 else: 322 return unicode(value) # TODO: simple type conversions?
323
324 - def to_json(self, keepSubject=False):
325 if simplejson: 326 return simplejson.dumps(self.to_dict(keepSubject)) 327 else: 328 raise NotImplementedError
329 330 331 #----------------------------------------------------------------------- 332 333 334 # Is the use of weakref fine enough (reasonably needed to avoid cyclic 335 # references and hence possible memory leaks)? 336 # See: <http://docs.python.org/lib/module-weakref.html> 337 from weakref import WeakValueDictionary 338
339 -class ExecCache(object):
340 """ 341 This is a query execution cache which reuses results for the same query, 342 subject and language, avoiding multiple instances of the same query when 343 given the same subject and lang. 344 """
345 - def __init__(self):
346 self.cache = WeakValueDictionary()
347 - def __call__(self, query, graph, lang, subject):
348 cache = self.cache 349 key = (id(query), unicode(subject), lang) 350 #key = (query, subject, lang) 351 result = cache.get(key) 352 if not result: 353 result = query(graph, lang, subject, self) 354 cache[key] = result 355 return result
356 357
358 -def query_or_cached(rdfQuery, execCache):
359 if execCache: 360 def run_query(graph, lang, uri): 361 return execCache(rdfQuery, graph, lang, uri)
362 return run_query 363 else: 364 return rdfQuery 365 366
367 -def run_queries(queries, graph, lang, subject):
368 execCache = ExecCache() 369 for query in queries: 370 yield execCache(query, graph, lang, subject)
371 372 373 #----------------------------------------------------------------------- 374 375
376 -class Filter(object):
377 - def __init__(self, func):
378 self.func = func
379 - def __call__(self, items):
380 return filter(self.func, items)
381 382
383 -class Sorter(object):
384 - def __init__(self, obj=None, reverse=False, ignoreCase=False):
385 if callable(obj): 386 self.attr = None 387 self.func = obj 388 else: 389 self.attr = obj 390 self.func = None 391 self.reverse = reverse 392 self.ignoreCase = ignoreCase
393 - def __call__(self, items):
394 copy = items[:] 395 copy.sort(self.sort) 396 if self.reverse: 397 copy.reverse() 398 return copy
399 - def sort(self, r1, r2):
400 attr = self.attr 401 func = self.func 402 if attr: 403 v1, v2 = getattr(r1, attr, r1), getattr(r2, attr, r2) 404 elif func: 405 v1, v2 = func(r1), func(r2) 406 else: 407 v1, v2 = r1, r2 408 if self.ignoreCase: 409 v1, v2 = v1.lower(), v2.lower() 410 return cmp(v1, v2)
411 412 413 #----------------------------------------------------------------------- 414 415 416 # TODO: totally untested! 417 # - use: TypeSwitch(persons=Person, values=Literal) 418 # - a subclass of RdfQuery? Or affect selector..? Reasonably yes.. 419 # - should adapt to if stuff is a list or one thing (one or each) 420 # - how about localized? 421 # - also should be used as list *or*: 422 # - obj.switchedstuff.persons 423 #def type_switch(typeSelectors, default): 424 # rdfType = graph.value(resource, RDF.type, None, any=True) 425 # def select(graph, lang, resource, **kwargs): 426 # query = typeSelectors.get(rdfType, default) 427 # return query(graph, lang, resource, **kwargs) 428 # return select 429 430 431
432 -def back_from_value(graph, subject, predicate, value):
433 if isinstance(value, RdfQuery): 434 graph.add((subject, predicate, value._subject)) 435 value.to_graph(graph) 436 else: 437 if not isinstance(value, list): # TODO: fix this 438 graph.add((subject, predicate, value))
439 440
441 -class UnarySelector(Selector):
442 - def back_to_graph(self, graph, subject, value):
443 back_from_value(graph, subject, self.predicate, value)
444
445 -class EachSelector(Selector):
446 - def back_to_graph(self, graph, subject, values):
447 for value in values: 448 back_from_value(graph, subject, self.predicate, value)
449 450
451 -class one(UnarySelector):
452 - def select(self, graph, lang, subject):
453 return graph.value(subject, self.predicate, None, any=True)
454 455
456 -class each(EachSelector):
457 - def select(self, graph, lang, subject):
458 return list(graph.objects(subject, self.predicate))
459 460
461 -class one_where_self_is(Selector):
462 - def select(self, graph, lang, subject):
463 return graph.value(None, self.predicate, subject, any=True)
464
465 - def back_to_graph(self, graph, subject, value):
466 back_from_value(graph, value._subject, self.predicate, subject)
467 468
469 -class each_where_self_is(Selector):
470 - def select(self, graph, lang, subject):
471 return list(graph.subjects(self.predicate, subject))
472
473 - def back_to_graph(self, graph, subject, values):
474 for value in values: 475 back_from_value(graph, value._subject, self.predicate, subject)
476 477
478 -class collection(Selector):
479 - def __init__(self, predBase=None, subQuery=None, multiple=False):
480 Selector.__init__(self, predBase, subQuery) 481 self.multiple = multiple
482
483 - def select(self, graph, lang, subject):
484 if self.multiple: 485 allItems = [graph.items(res) 486 for res in graph.objects(subject, self.predicate)] 487 return list(chain(*allItems)) 488 else: 489 return list(graph.items( 490 graph.value(subject, self.predicate, None, any=True) 491 ))
492
493 - def back_to_graph(self, graph, subject, values):
494 if not values: 495 graph.add((subject, self.predicate, RDF.nil)) 496 return 497 bnode = BNode() 498 graph.add((subject, self.predicate, bnode)) 499 for value in values: 500 back_from_value(graph, bnode, RDF.first, value) 501 newBnode = BNode() 502 graph.add((bnode, RDF.rest, newBnode)) 503 bnode = newBnode 504 graph.add((bnode, RDF.rest, RDF.nil))
505 506
507 -class TypeLocalized(Selector):
508 - def type_raw_value(self, value, lang):
509 if isinstance(value, basestring): 510 value = Literal(value, lang) 511 return value
512
513 -class localized(TypeLocalized, UnarySelector):
514 - def select(self, graph, lang, subject):
515 first = None 516 for value in graph.objects(subject, self.predicate): 517 if not first: first = value 518 if getattr(value, 'language', None) == lang: 519 return value 520 return first
521 522 523 # TODO: This is a hackish solution; see also below (transparently using datatype). 524 # It also reduces the literal, making it irreversible (should store original value!). 525 try: 526 from oort.util._genshifilters import language_filtered_xml 527 except ImportError, e: 528 warnings.warn("Could not import _genshifilters. Error was: %r. The selector 'localized_xml' will not be available." % e) 529 else:
530 - class localized_xml(UnarySelector):
531 """This selector removes any elements with an xml:lang other than the 532 current language. It also supports the never standardized 'rdf-wrapper' in 533 XML Literals, who are removed from the output. 534 535 Important! This is currently tied to the Genshi Templating System, and may 536 not work as expected in all cases.""" 537
538 - def select(self, graph, lang, subject):
539 return language_filtered_xml( 540 graph.objects(subject, self.predicate), lang)
541
542 - def type_raw_value(self, value, lang):
543 if isinstance(value, basestring): 544 value = Literal(value, datatype=RDF.XMLLiteral) 545 return value
546
547 -class i18n_dict(Selector):
548 - def select(self, graph, lang, subject):
549 valueDict = {} 550 for value in graph.objects(subject, self.predicate): 551 valueDict[value.language] = value 552 return valueDict
553
554 - def back_to_graph(self, graph, subject, value):
555 for lang, text in value.items(): 556 graph.add((subject, self.predicate, Literal(text, lang=lang)))
557 558
559 -class each_localized(TypeLocalized, EachSelector):
560 - def select(self, graph, lang, subject):
561 return [ value for value in graph.objects(subject, self.predicate) 562 if value.language == lang ]
563 564 565 #----------------------------------------------------------------------- 566 567 568 # TODO: Though "widely used" (by me), I think this was a little premature. 569 # There seems little use for this that a regular property can't do (getting the 570 # graph, lang and subject from self -- where needed). Perhaps I should include 571 # a memoized codeutil so it's easy to create lazily calculated bigger things. 572 # 573 # Even worse, this "utility" bypasses _process_for_subqueries, which is 574 # intricate and very close to the implementation. And this is the only reason 575 # retreive_result is marked as a "default_method"; it should reasonably be 576 # private. 577 #
578 -class selector(Selector):
579 "Use as decorator for methods of an RdfQuery subclass to convert them to selectors."
580 - def __init__(self, func):
581 super(selector, self).__init__(None) 582 self.func = func
583 - def retreive_result(self, rdfQueryInstance, selectArgs):
584 return self.func(rdfQueryInstance, *selectArgs)
585 @classmethod
586 - def filtered_by(cls, *filters):
587 def decorator(func): 588 sel = cls(func) 589 for fltr in filters: 590 sel.add_filter(fltr) 591 return sel
592 return decorator
593 594 595 #----------------------------------------------------------------------- 596 # TODO: consider returning ElementTree data for XML Literals. And if so, also 597 # filtered on u'{http://www.w3.org/XML/1998/namespace}lang' (keep if none) for localized. 598 599 # TODO: also consider checking datatype and coercing (at least) these: 600 # Use: rdflib.Literal.castPythonToLiteral 601 # See: rdflib.sparql.sparqlOperators.getLiteralValue(v) 602 # See: <http://en.wikipedia.org/wiki/RDFLib#RDF_Literal_Support> 603 # - NOTE: Isn't this done automatically by rdflib now? I believe so. 604 605 606 #----------------------------------------------------------------------- 607 608
609 -class QueryContext(object):
610 """ 611 A query context, used to provide a managed context for query execution. 612 613 Initalized with: 614 615 - graph 616 - language or getter for language 617 - a set of queries or a modules containing queries 618 Accessible as attributes on the context or via view_for using RDF_TYPE 619 620 """ 621
622 - def __init__(self, graph, langobj, queries=None, query_modules=None):
623 self._graph = graph 624 self._execCache = ExecCache() 625 626 if callable(langobj): 627 get_lang = langobj 628 else: 629 def get_lang(): return langobj 630 self._get_lang = get_lang 631 632 self._querydict = querydict = {} 633 if queries: 634 for query in queries: 635 querydict[query.__name__] = query 636 if query_modules: 637 for module in query_modules: 638 for name, obj in module.__dict__.items(): 639 if isinstance(obj, type) and issubclass(obj, RdfQuery): 640 querydict[name] = obj 641 642 self._queryTypeMap = {} 643 for query in querydict.values(): 644 self._queryTypeMap[query.RDF_TYPE] = query
645
646 - def __getattr__(self, name):
647 try: 648 query = self._querydict[name] 649 return self._prepared_query(query) 650 except KeyError: 651 raise AttributeError("%s has no attribute '%s'" % (self, name))
652
653 - def view_for(self, uriref):
654 for typeref in self._graph.objects(uriref, RDF.type): 655 query = self._queryTypeMap.get(typeref) 656 if query: 657 return self._prepared_query(query)(uriref) 658 raise KeyError("%s has no query for type '%s'" % (self, uriref))
659
660 - def _prepared_query(self, query):
661 return self.PreparedQuery(self, query)
662
663 - class PreparedQuery(object):
664 __slots__ = ('query', 'context') 665
666 - def __init__(self, context, query):
667 self.context = context 668 self.query = query
669
670 - def __call__(self, subject):
671 cx = self.context 672 return cx._execCache(self.query, cx._graph, cx._get_lang(), subject)
673
674 - def find_all(self):
675 cx = self.context 676 for subject in cx._graph.subjects(RDF.type, self.query.RDF_TYPE): 677 yield cx.view_for(subject)
678
679 - def find_by(self, **kwargs):
680 cx = self.context 681 return self.query.find_by(cx._graph, cx._get_lang(), 682 execCache=cx._execCache, **kwargs)
683