# # A basic "module" to translate a RDF resource to a Python dict # Usage: # import rdf2dict # rdf2dict.get_dict(node_uri, {document_uri}) # # By Alexandre Passant # This code is public domain import RDF prefixes = { 'rdf' : 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'rdfs' : 'http://www.w3.org/2000/01/rdf-schema#', 'owl' : 'http://www.w3.org/2002/07/owl#', 'foaf' : 'http://xmlns.com/foaf/0.1/', 'bio' : 'http://purl.org/vocab/bio/0.1/', 'rel' : 'http://purl.org/vocab/relationship/', 'doap' : 'http://usefulinc.com/ns/doap#', 'geo' : 'http://www.w3.org/2003/01/geo/wgs84_pos#', 'dc' : 'http://purl.org/dc/elements/1.1/', 'dcterms' : 'http://purl.org/dc/terms/', 'skos' : 'http://www.w3.org/2004/02/skos/core#', 'sioc' : 'http://rdfs.org/sioc/ns#', 'sioct' : 'http://rdfs.org/sioc/types#', 'rss' : 'http://purl.org/rss/1.0/', 'content' : 'http://purl.org/rss/1.0/modules/content/', 'cc' : 'http://web.resource.org/cc/', 'admin' : 'http://webns.net/mvcb/' } def get_dict(node_uri, document_uri = ''): model = RDF.Model() parser = RDF.Parser(name="rdfxml") if not document_uri: document_uri = node_uri[0:node_uri.find('#')] parser.parse_into_model(model, document_uri) return _node_dict(RDF.Node(uri_string = node_uri), model) def _node_dict(uri, model, visited = [], next = 1): ## Save URI for resources if uri.is_resource(): dict = {'uri': uri.uri.__str__()} else: dict = {} ## Find related statements statements = model.find_statements(RDF.Statement(uri, None, None)) if not statements.end(): visited.append(uri) literals = {} for s in statements: p = s.predicate.uri.__str__() ## Replace full URIs by prefix_thing for prefix, url in prefixes.items(): if p[0:len(url)] == url: p = "%s_%s" %(prefix, p[len(url):]) ## Create array if not exists if not p in dict.keys(): dict[p] = [] ## Saving literals, strings, and strings by lang if s.object.is_literal(): if not p in literals.keys(): literals[p] = {'literals':[], 'strings':[]} dict[p].append(literals[p]) literal = s.object.literal_value literals[p]['literals'].append(literal) literals[p]['strings'].append(literal['string'].strip()) lang = literal['language'] if lang: if not lang in literals[p].keys(): literals[p]["strings_%s" %lang] = [] literals[p]["strings_%s" %lang].append(literal['string'].strip()) ## Loop if not visited URI, else do only a last step (needed to get rdfs:label for a visited node) else: if not s.object in visited: dict[p].append(_node_dict(s.object, model, visited, 1)) else: if next: dict[p].append(_node_dict(s.object, model, visited, 0)) elif not s.object.is_blank(): dict[p] = [{'uri': s.object.uri.__str__()}] return dict