#!/usr/bin/env python # Copyright (c) 2005 Ron Alford # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # import logging logging.basicConfig() log = logging.getLogger("FoafCleaner") import sets import sys from optparse import OptionParser from rdflib import Graph, BNode, Literal, URIRef, Namespace from rdflib import RDF, RDFS FOAF = Namespace("http://xmlns.com/foaf/0.1/") OWL = Namespace("http://www.w3.org/2002/07/owl#") XSD = Namespace("http://www.w3.org/2001/XMLSchema#") # Properties that aren't easily identifiable DATAPROPS = [ FOAF['givenname'], FOAF['sha1'], FOAF['title'], ] OBJECTPROPS = [ FOAF['phone'], ] def fix_imports(graph): """Remove extraneous owl:imports""" log.info("Removing extraneous imports of RDFS and OWL") graph.remove((FOAF, OWL['imports'], URIRef("http://www.w3.org/2000/01/rdf-schema"))) graph.remove((FOAF, OWL['imports'], URIRef("http://www.w3.org/2002/07/owl"))) def fix_class_types(graph, remove_extra): """Add OWL:Class to all classes""" log.info("Adding OWL:Class types to classes") for class_ in list(graph.subjects(RDF.type, RDFS.Class)): log.debug("Adding OWL:Class to %s"%class_) graph.add((class_, RDF.type, OWL['Class'])) if remove_extra: graph.remove((class_, RDF.type, RDFS.Class)) def fix_prop_types(graph, remove_extra): """Add property types to all properties""" log.info("Adding property types to all properties") annotationProps = sets.Set() dataProps = sets.Set(DATAPROPS) objectProps = sets.Set(OBJECTPROPS) # Easy cases for prop in (list(graph.subjects(RDFS.range, RDFS.Resource)) + list(graph.subjects(RDFS.range, OWL['Thing']))): objectProps.add(prop) if remove_extra: graph.remove((prop, RDFS.range, RDFS.Resource)) graph.remove((prop, RDFS.range, OWL['Thing'])) for prop in graph.subjects(RDFS.range, RDFS.Literal): dataProps.add(prop) if remove_extra: graph.remove((prop, RDFS.range, RDFS.Literal)) # inverseOf doesn't make sense for anything but ObjectProperties for prop1, prop2 in graph.subject_objects(OWL['inverseOf']): objectProps.add(prop1) objectProps.add(prop2) # Guessing that if the range is not rdfs:Literal nor in xsd space, # it's an ObjectProperty for prop, image in graph.subject_objects(RDFS.range): if (image != RDFS.Literal) and not image.startswith(XSD): objectProps.add(prop) # Mark non rdf/rdfs/owl props used on terms as annotation properties for subject in (list(graph.subjects(RDF.type, RDF.Property)) + list(graph.subjects(RDF.type, RDFS.Class)) + list(graph.subjects(RDF.type, OWL['Class'])) + list(graph.subjects(RDF.type, OWL['AnnotationProperty'])) + list(graph.subjects(RDF.type, OWL['DatatypeProperty'])) + list(graph.subjects(RDF.type, OWL['ObjectProperty'])) + list(graph.subjects(RDF.type, OWL['Ontology']))): for prop in graph.predicates(subject): if not (prop.startswith(RDF.RDFNS) or prop.startswith(RDFS.RDFSNS) or prop.startswith(OWL)): annotationProps.add(prop) # Guess that you have the same type as your parent or child properties for prop in graph.subjects(RDF.type, RDF.Property): for relative in (list(graph.subjects(RDFS.subPropertyOf, prop)) + list(graph.objects(prop, RDFS.subPropertyOf))): if (((relative, RDF.type, OWL['AnnotationProperty']) in graph) or (relative in annotationProps)): annotationProps.add(prop) if (((relative, RDF.type, OWL['DatatypeProperty']) in graph) or (relative in dataProps)): dataProps.add(prop) if (((relative, RDF.type, OWL['ObjectProperty']) in graph) or (relative in objectProps)): objectProps.add(prop) for prop in annotationProps: log.debug("Marking %s as an AnnotationProperty"%prop) graph.add((prop, RDF.type, OWL["AnnotationProperty"])) if remove_extra: graph.remove((prop, RDF.type, RDF.Property)) for prop in dataProps: log.debug("Marking %s as a DatatypeProperty"%prop) graph.add((prop, RDF.type, OWL["DatatypeProperty"])) if remove_extra: graph.remove((prop, RDF.type, RDF.Property)) for prop in objectProps: log.debug("Marking %s as an ObjectProperty"%prop) graph.add((prop, RDF.type, OWL["ObjectProperty"])) if remove_extra: graph.remove((prop, RDF.type, RDF.Property)) def fix_restrictions(graph, remove_extra): log.info("Fixing global property restrictions") for prop in list(graph.subjects(RDFS.range, RDFS.Resource)): log.debug("Changing range of %s to OWL:Thing"%prop) graph.remove((prop, RDFS.range, RDFS.Resource)) if not remove_extra: graph.add((prop, RDFS.range, OWL['Thing'])) for prop in list(graph.subjects(RDFS.domain, RDFS.Resource)): log.debug("Changing domain of %s to OWL:Thing"%prop) graph.remove((prop, RDFS.domain, RDFS.Resource)) if not remove_extra: graph.add((prop, RDFS.domain, OWL['Thing'])) def remove_data_ifp(graph): log.info("Removing InverseFunctionalProperty type of DatatypeProperties") for prop in list(graph.subjects(RDF.type, OWL['DatatypeProperty'])): ifpTriple = (prop, RDF.type, OWL['InverseFunctionalProperty']) if ifpTriple in graph: log.debug("Removing IFP from %s"%prop) graph.remove(ifpTriple) def remove_disjoints(graph): log.info("Removing disjointWith") for triple in list(graph.triples((None, OWL['disjointWith'], None))): log.debug("Removing %s"%str(triple)) graph.remove(triple) def remove_membershipClass_range(graph): log.info("Removing range of foaf:membershipClass") graph.remove((FOAF['membershipClass'], RDFS.range, RDFS.Class)) def remove_name_subProp(graph): log.info("Removing subProperty link between foaf:name and rdfs:label") graph.remove((FOAF['name'], RDFS.subPropertyOf, RDFS.label)) def set_level(option, opt_str, value, parser): values = parser.values value = value.lower().strip() if value in ["full", "dl", "lite"]: values.classes = True values.imports = True values.properties = True if value in ["dl", "lite"]: values.restrictions = True values.ifps = True values.membership = True values.name = True if value in ["lite"]: values.disjoints = True def set_verbose(count): if count > 0: log.root.setLevel(logging.INFO) if count > 1: log.root.setLevel(logging.DEBUG) if __name__ == '__main__': parser = OptionParser() parser.add_option("-l", "--level", action="callback", callback=set_level, metavar="LEVEL", type="string", help="Set target OWL LEVEL (Full, DL, Lite)") parser.add_option("-c", "--classes", dest="classes", default=False, action="store_true", help="Add OWL:Class statements to classes") parser.add_option("-i", "--imports", dest="imports", default=False, action="store_true", help="Remove extraneous imports of RDFS and OWL") parser.add_option("-p", "--properties", dest="properties", default=False, action="store_true", help="Add property types to properties") parser.add_option("-r", "--restrictions", dest="restrictions", default=False, action="store_true", help="Change range and domain of properties from RDFS:Resource to OWL:Thing") parser.add_option("-d", "--data_ifps", dest="ifps", default=False, action="store_true", help="Remove IFP declarations from datatype properties") parser.add_option("-m", "--membership", dest="membership", default=False, action="store_true", help="Remove range of foaf:membershipClass") parser.add_option("-n", "--name", dest="name", default=False, action="store_true", help="Remove subProp link between foaf:name and rdfs:label") parser.add_option("--disjoints", dest="disjoints", default=False, action="store_true", help="Remove disjointWith statements") parser.add_option("--clean", dest="clean", default=False, action="store_true", help="Remove extra type triples") parser.add_option("-v", "--verbose", action="count", help="Turn on verbose output (twice for very verbose)") (options, args) = parser.parse_args() set_verbose(options.verbose) if len(args) < 1: args = [FOAF,] log.info("Loading %s"%args[0]) graph = Graph() graph.load(args[0]) remove_extra = options.clean if options.imports: fix_imports(graph) if options.classes: fix_class_types(graph, remove_extra) if options.name: remove_name_subProp(graph) if options.properties: fix_prop_types(graph, remove_extra) if options.restrictions: fix_restrictions(graph, remove_extra) if options.ifps: remove_data_ifp(graph) if options.membership: remove_membershipClass_range(graph) if options.disjoints: remove_disjoints(graph) print graph.serialize(format="xml")