001// *************************************************************************************************************************** 002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file * 003// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * 004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance * 005// * with the License. You may obtain a copy of the License at * 006// * * 007// * http://www.apache.org/licenses/LICENSE-2.0 * 008// * * 009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an * 010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * 011// * specific language governing permissions and limitations under the License. * 012// *************************************************************************************************************************** 013package org.apache.juneau.jena; 014 015import static org.apache.juneau.internal.StringUtils.*; 016import static org.apache.juneau.jena.Constants.*; 017 018import java.io.IOException; 019import java.util.*; 020 021import org.apache.jena.rdf.model.*; 022import org.apache.jena.util.iterator.*; 023import org.apache.juneau.*; 024import org.apache.juneau.parser.*; 025import org.apache.juneau.transform.*; 026import org.apache.juneau.xml.*; 027 028/** 029 * Session object that lives for the duration of a single use of {@link RdfParser}. 030 * 031 * <p> 032 * This class is NOT thread safe. 033 * It is typically discarded after one-time use although it can be reused against multiple inputs. 034 */ 035@SuppressWarnings({"unchecked", "rawtypes"}) 036public class RdfParserSession extends ReaderParserSession { 037 038 private final RdfParser ctx; 039 private final Property pRoot, pValue, pType, pRdfType; 040 private final Model model; 041 private final RDFReader rdfReader; 042 private final Set<Resource> urisVisited = new HashSet<>(); 043 044 /** 045 * Create a new session using properties specified in the context. 046 * 047 * @param ctx 048 * The context creating this session object. 049 * The context contains all the configuration settings for this object. 050 * @param args 051 * Runtime session arguments. 052 */ 053 protected RdfParserSession(RdfParser ctx, ParserSessionArgs args) { 054 super(ctx, args); 055 this.ctx = ctx; 056 model = ModelFactory.createDefaultModel(); 057 addModelPrefix(ctx.getJuneauNs()); 058 addModelPrefix(ctx.getJuneauBpNs()); 059 pRoot = model.createProperty(ctx.getJuneauNs().getUri(), RDF_juneauNs_ROOT); 060 pValue = model.createProperty(ctx.getJuneauNs().getUri(), RDF_juneauNs_VALUE); 061 pType = model.createProperty(ctx.getJuneauBpNs().getUri(), RDF_juneauNs_TYPE); 062 pRdfType = model.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); 063 rdfReader = model.getReader(ctx.getLanguage()); 064 065 // Note: NTripleReader throws an exception if you try to set any properties on it. 066 if (! ctx.getLanguage().equals(LANG_NTRIPLE)) { 067 for (Map.Entry<String,Object> e : ctx.jenaProperties.entrySet()) 068 rdfReader.setProperty(e.getKey(), e.getValue()); 069 } 070 } 071 072 @Override /* ReaderParserSession */ 073 protected <T> T doParse(ParserPipe pipe, ClassMeta<T> type) throws IOException, ParseException, ExecutableException { 074 075 RDFReader r = rdfReader; 076 r.read(model, pipe.getBufferedReader(), null); 077 078 List<Resource> roots = getRoots(model); 079 080 // Special case where we're parsing a loose collection of resources. 081 if (isLooseCollections() && type.isCollectionOrArray()) { 082 Collection c = null; 083 if (type.isArray() || type.isArgs()) 084 c = new ArrayList(); 085 else 086 c = ( 087 type.canCreateNewInstance(getOuter()) 088 ? (Collection<?>)type.newInstance(getOuter()) 089 : new ObjectList(this) 090 ); 091 092 int argIndex = 0; 093 for (Resource resource : roots) 094 c.add(parseAnything(type.isArgs() ? type.getArg(argIndex++) : type.getElementType(), resource, 095 getOuter(), null)); 096 097 if (type.isArray() || type.isArgs()) 098 return (T)toArray(type, c); 099 return (T)c; 100 } 101 102 if (roots.isEmpty()) 103 return null; 104 if (roots.size() > 1) 105 throw new ParseException(this, "Too many root nodes found in model: {0}", roots.size()); 106 Resource resource = roots.get(0); 107 108 return parseAnything(type, resource, getOuter(), null); 109 } 110 111 private final void addModelPrefix(Namespace ns) { 112 model.setNsPrefix(ns.getName(), ns.getUri()); 113 } 114 115 /* 116 * Decodes the specified string. 117 * If {@link RdfParser#RDF_trimWhitespace} is <jk>true</jk>, the resulting string is trimmed before decoding. 118 * If {@link #isTrimStrings()} is <jk>true</jk>, the resulting string is trimmed after decoding. 119 */ 120 private String decodeString(Object o) { 121 if (o == null) 122 return null; 123 String s = o.toString(); 124 if (s.isEmpty()) 125 return s; 126 if (isTrimWhitespace()) 127 s = s.trim(); 128 s = XmlUtils.decode(s, null); 129 if (isTrimStrings()) 130 s = s.trim(); 131 return s; 132 } 133 134 /* 135 * Finds the roots in the model using either the "root" property to identify it, 136 * or by resorting to scanning the model for all nodes with no incoming predicates. 137 */ 138 private List<Resource> getRoots(Model m) { 139 List<Resource> l = new LinkedList<>(); 140 141 // First try to find the root using the "http://www.apache.org/juneau/root" property. 142 Property root = m.createProperty(getJuneauNs().getUri(), RDF_juneauNs_ROOT); 143 for (ResIterator i = m.listResourcesWithProperty(root); i.hasNext();) 144 l.add(i.next()); 145 146 if (! l.isEmpty()) 147 return l; 148 149 // Otherwise, we need to find all resources that aren't objects. 150 // We want to explicitly ignore statements where the subject 151 // and object are the same node. 152 Set<RDFNode> objects = new HashSet<>(); 153 for (StmtIterator i = m.listStatements(); i.hasNext();) { 154 Statement st = i.next(); 155 RDFNode subject = st.getSubject(); 156 RDFNode object = st.getObject(); 157 if (object.isResource() && ! object.equals(subject)) 158 objects.add(object); 159 } 160 for (ResIterator i = m.listSubjects(); i.hasNext();) { 161 Resource r = i.next(); 162 if (! objects.contains(r)) 163 l.add(r); 164 } 165 return l; 166 } 167 168 private <T> BeanMap<T> parseIntoBeanMap(Resource r2, BeanMap<T> m) throws IOException, ParseException, ExecutableException { 169 BeanMeta<T> bm = m.getMeta(); 170 RdfBeanMeta rbm = bm.getExtendedMeta(RdfBeanMeta.class); 171 if (rbm.hasBeanUri() && r2.getURI() != null) 172 rbm.getBeanUriProperty().set(m, null, r2.getURI()); 173 for (StmtIterator i = r2.listProperties(); i.hasNext();) { 174 Statement st = i.next(); 175 Property p = st.getPredicate(); 176 String key = decodeString(p.getLocalName()); 177 BeanPropertyMeta pMeta = m.getPropertyMeta(key); 178 setCurrentProperty(pMeta); 179 if (pMeta != null) { 180 RDFNode o = st.getObject(); 181 ClassMeta<?> cm = pMeta.getClassMeta(); 182 if (cm.isCollectionOrArray() && isMultiValuedCollections(pMeta)) { 183 ClassMeta<?> et = cm.getElementType(); 184 Object value = parseAnything(et, o, m.getBean(false), pMeta); 185 setName(et, value, key); 186 pMeta.add(m, key, value); 187 } else { 188 Object value = parseAnything(cm, o, m.getBean(false), pMeta); 189 setName(cm, value, key); 190 pMeta.set(m, key, value); 191 } 192 } else if (! (p.equals(pRoot) || p.equals(pType))) { 193 onUnknownProperty(key, m); 194 } 195 setCurrentProperty(null); 196 } 197 return m; 198 } 199 200 private boolean isMultiValuedCollections(BeanPropertyMeta pMeta) { 201 RdfBeanPropertyMeta bpRdf = (pMeta == null ? RdfBeanPropertyMeta.DEFAULT : pMeta.getExtendedMeta(RdfBeanPropertyMeta.class)); 202 203 if (bpRdf.getCollectionFormat() != RdfCollectionFormat.DEFAULT) 204 return bpRdf.getCollectionFormat() == RdfCollectionFormat.MULTI_VALUED; 205 206 return getCollectionFormat() == RdfCollectionFormat.MULTI_VALUED; 207 } 208 209 private <T> T parseAnything(ClassMeta<?> eType, RDFNode n, Object outer, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException { 210 211 if (eType == null) 212 eType = object(); 213 PojoSwap<T,Object> swap = (PojoSwap<T,Object>)eType.getPojoSwap(this); 214 BuilderSwap<T,Object> builder = (BuilderSwap<T,Object>)eType.getBuilderSwap(this); 215 ClassMeta<?> sType = null; 216 if (builder != null) 217 sType = builder.getBuilderClassMeta(this); 218 else if (swap != null) 219 sType = swap.getSwapClassMeta(this); 220 else 221 sType = eType; 222 setCurrentClass(sType); 223 224 if (! sType.canCreateNewInstance(outer)) { 225 if (n.isResource()) { 226 Statement st = n.asResource().getProperty(pType); 227 if (st != null) { 228 String c = st.getLiteral().getString(); 229 ClassMeta tcm = getClassMeta(c, pMeta, eType); 230 if (tcm != null) 231 sType = eType = tcm; 232 } 233 } 234 } 235 236 Object o = null; 237 if (n.isResource() && n.asResource().getURI() != null && n.asResource().getURI().equals(RDF_NIL)) { 238 // Do nothing. Leave o == null. 239 } else if (sType.isObject()) { 240 if (n.isLiteral()) { 241 o = n.asLiteral().getValue(); 242 if (o instanceof String) { 243 o = decodeString(o); 244 } 245 } 246 else if (n.isResource()) { 247 Resource r = n.asResource(); 248 if (! urisVisited.add(r)) 249 o = r.getURI(); 250 else if (r.getProperty(pValue) != null) { 251 o = parseAnything(object(), n.asResource().getProperty(pValue).getObject(), outer, null); 252 } else if (isSeq(r)) { 253 o = new ObjectList(this); 254 parseIntoCollection(r.as(Seq.class), (Collection)o, sType, pMeta); 255 } else if (isBag(r)) { 256 o = new ObjectList(this); 257 parseIntoCollection(r.as(Bag.class), (Collection)o, sType, pMeta); 258 } else if (r.canAs(RDFList.class)) { 259 o = new ObjectList(this); 260 parseIntoCollection(r.as(RDFList.class), (Collection)o, sType, pMeta); 261 } else { 262 // If it has a URI and no child properties, we interpret this as an 263 // external resource, and convert it to just a URL. 264 String uri = r.getURI(); 265 if (uri != null && ! r.listProperties().hasNext()) { 266 o = r.getURI(); 267 } else { 268 ObjectMap m2 = new ObjectMap(this); 269 parseIntoMap(r, m2, null, null, pMeta); 270 o = cast(m2, pMeta, eType); 271 } 272 } 273 } else { 274 throw new ParseException(this, "Unrecognized node type ''{0}'' for object", n); 275 } 276 } else if (sType.isBoolean()) { 277 o = convertToType(getValue(n, outer), boolean.class); 278 } else if (sType.isCharSequence()) { 279 o = decodeString(getValue(n, outer)); 280 } else if (sType.isChar()) { 281 o = parseCharacter(decodeString(getValue(n, outer))); 282 } else if (sType.isNumber()) { 283 o = parseNumber(getValue(n, outer).toString(), (Class<? extends Number>)sType.getInnerClass()); 284 } else if (sType.isMap()) { 285 Resource r = n.asResource(); 286 if (! urisVisited.add(r)) 287 return null; 288 Map m = (sType.canCreateNewInstance(outer) ? (Map)sType.newInstance(outer) : new ObjectMap(this)); 289 o = parseIntoMap(r, m, eType.getKeyType(), eType.getValueType(), pMeta); 290 } else if (sType.isCollectionOrArray() || sType.isArgs()) { 291 if (sType.isArray() || sType.isArgs()) 292 o = new ArrayList(); 293 else 294 o = (sType.canCreateNewInstance(outer) ? (Collection<?>)sType.newInstance(outer) : new ObjectList(this)); 295 Resource r = n.asResource(); 296 if (! urisVisited.add(r)) 297 return null; 298 if (isSeq(r)) { 299 parseIntoCollection(r.as(Seq.class), (Collection)o, sType, pMeta); 300 } else if (isBag(r)) { 301 parseIntoCollection(r.as(Bag.class), (Collection)o, sType, pMeta); 302 } else if (r.canAs(RDFList.class)) { 303 parseIntoCollection(r.as(RDFList.class), (Collection)o, sType, pMeta); 304 } else { 305 throw new ParseException(this, "Unrecognized node type ''{0}'' for collection", n); 306 } 307 if (sType.isArray() || sType.isArgs()) 308 o = toArray(sType, (Collection)o); 309 } else if (builder != null) { 310 Resource r = n.asResource(); 311 if (! urisVisited.add(r)) 312 return null; 313 BeanMap<?> bm = toBeanMap(builder.create(this, eType)); 314 o = builder.build(this, parseIntoBeanMap(r, bm).getBean(), eType); 315 } else if (sType.canCreateNewBean(outer)) { 316 Resource r = n.asResource(); 317 if (! urisVisited.add(r)) 318 return null; 319 BeanMap<?> bm = newBeanMap(outer, sType.getInnerClass()); 320 o = parseIntoBeanMap(r, bm).getBean(); 321 } else if (sType.isUri() && n.isResource()) { 322 o = sType.newInstanceFromString(outer, decodeString(n.asResource().getURI())); 323 } else if (sType.canCreateNewInstanceFromString(outer)) { 324 o = sType.newInstanceFromString(outer, decodeString(getValue(n, outer))); 325 } else if (n.isResource()) { 326 Resource r = n.asResource(); 327 Map m = new ObjectMap(this); 328 parseIntoMap(r, m, sType.getKeyType(), sType.getValueType(), pMeta); 329 if (m.containsKey(getBeanTypePropertyName(eType))) 330 o = cast((ObjectMap)m, pMeta, eType); 331 else 332 throw new ParseException(this, "Class ''{0}'' could not be instantiated. Reason: ''{1}''", sType.getInnerClass().getName(), sType.getNotABeanReason()); 333 } else { 334 throw new ParseException(this, "Class ''{0}'' could not be instantiated. Reason: ''{1}''", sType.getInnerClass().getName(), sType.getNotABeanReason()); 335 } 336 337 if (swap != null && o != null) 338 o = unswap(swap, o, eType); 339 340 if (outer != null) 341 setParent(eType, o, outer); 342 343 return (T)o; 344 } 345 346 private boolean isSeq(RDFNode n) { 347 if (n.isResource()) { 348 Statement st = n.asResource().getProperty(pRdfType); 349 if (st != null) 350 return RDF_SEQ.equals(st.getResource().getURI()); 351 } 352 return false; 353 } 354 355 private boolean isBag(RDFNode n) { 356 if (n.isResource()) { 357 Statement st = n.asResource().getProperty(pRdfType); 358 if (st != null) 359 return RDF_BAG.equals(st.getResource().getURI()); 360 } 361 return false; 362 } 363 364 private Object getValue(RDFNode n, Object outer) throws IOException, ParseException, ExecutableException { 365 if (n.isLiteral()) 366 return n.asLiteral().getValue(); 367 if (n.isResource()) { 368 Statement st = n.asResource().getProperty(pValue); 369 if (st != null) { 370 n = st.getObject(); 371 if (n.isLiteral()) 372 return n.asLiteral().getValue(); 373 return parseAnything(object(), st.getObject(), outer, null); 374 } 375 } 376 throw new ParseException(this, "Unknown value type for node ''{0}''", n); 377 } 378 379 private <K,V> Map<K,V> parseIntoMap(Resource r, Map<K,V> m, ClassMeta<K> keyType, 380 ClassMeta<V> valueType, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException { 381 // Add URI as "uri" to generic maps. 382 if (r.getURI() != null) { 383 K uri = convertAttrToType(m, "uri", keyType); 384 V value = convertAttrToType(m, r.getURI(), valueType); 385 m.put(uri, value); 386 } 387 for (StmtIterator i = r.listProperties(); i.hasNext();) { 388 Statement st = i.next(); 389 Property p = st.getPredicate(); 390 String key = p.getLocalName(); 391 if (! (key.equals("root") && p.getURI().equals(getJuneauNs().getUri()))) { 392 key = decodeString(key); 393 RDFNode o = st.getObject(); 394 K key2 = convertAttrToType(m, key, keyType); 395 V value = parseAnything(valueType, o, m, pMeta); 396 setName(valueType, value, key); 397 m.put(key2, value); 398 } 399 400 } 401 return m; 402 } 403 404 private <E> Collection<E> parseIntoCollection(Container c, Collection<E> l, 405 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException { 406 int argIndex = 0; 407 for (NodeIterator ni = c.iterator(); ni.hasNext();) { 408 E e = (E)parseAnything(type.isArgs() ? type.getArg(argIndex++) : type.getElementType(), ni.next(), l, pMeta); 409 l.add(e); 410 } 411 return l; 412 } 413 414 private <E> Collection<E> parseIntoCollection(RDFList list, Collection<E> l, 415 ClassMeta<?> type, BeanPropertyMeta pMeta) throws IOException, ParseException, ExecutableException { 416 int argIndex = 0; 417 for (ExtendedIterator<RDFNode> ni = list.iterator(); ni.hasNext();) { 418 E e = (E)parseAnything(type.isArgs() ? type.getArg(argIndex++) : type.getElementType(), ni.next(), l, pMeta); 419 l.add(e); 420 } 421 return l; 422 } 423 424 //----------------------------------------------------------------------------------------------------------------- 425 // Common properties 426 //----------------------------------------------------------------------------------------------------------------- 427 428 /** 429 * Configuration property: RDF format for representing collections and arrays. 430 * 431 * @see RdfParser#RDF_collectionFormat 432 * @return 433 * RDF format for representing collections and arrays. 434 */ 435 protected final RdfCollectionFormat getCollectionFormat() { 436 return ctx.getCollectionFormat(); 437 } 438 439 /** 440 * Configuration property: Default XML namespace for bean properties. 441 * 442 * @see RdfParser#RDF_juneauBpNs 443 * @return 444 * Default XML namespace for bean properties. 445 */ 446 protected final Namespace getJuneauBpNs() { 447 return ctx.getJuneauBpNs(); 448 } 449 450 /** 451 * Configuration property: XML namespace for Juneau properties. 452 * 453 * @see RdfParser#RDF_juneauNs 454 * @return 455 * XML namespace for Juneau properties. 456 */ 457 protected final Namespace getJuneauNs() { 458 return ctx.getJuneauNs(); 459 } 460 461 /** 462 * Configuration property: RDF language. 463 * 464 * @see RdfParser#RDF_language 465 * @return 466 * The RDF language to use. 467 */ 468 protected final String getLanguage() { 469 return ctx.getLanguage(); 470 } 471 472 /** 473 * Configuration property: Collections should be serialized and parsed as loose collections. 474 * 475 * @see RdfParser#RDF_looseCollections 476 * @return 477 * <jk>true</jk> if collections of resources are handled as loose collections of resources in RDF instead of 478 * resources that are children of an RDF collection (e.g. Sequence, Bag). 479 */ 480 protected final boolean isLooseCollections() { 481 return ctx.isLooseCollections(); 482 } 483 484 //----------------------------------------------------------------------------------------------------------------- 485 // Jena properties 486 //----------------------------------------------------------------------------------------------------------------- 487 488 /** 489 * Configuration property: All Jena-related configuration properties. 490 * 491 * @return 492 * A map of all Jena-related configuration properties. 493 */ 494 protected final Map<String,Object> getJenaProperties() { 495 return ctx.getJenaProperties(); 496 } 497 498 //----------------------------------------------------------------------------------------------------------------- 499 // Properties 500 //----------------------------------------------------------------------------------------------------------------- 501 502 /** 503 * Configuration property: Trim whitespace from text elements. 504 * 505 * @see RdfParser#RDF_trimWhitespace 506 * @return 507 * <jk>true</jk> if whitespace in text elements will be automatically trimmed. 508 */ 509 protected final boolean isTrimWhitespace() { 510 return ctx.isTrimWhitespace(); 511 } 512 513 //----------------------------------------------------------------------------------------------------------------- 514 // Other methods 515 //----------------------------------------------------------------------------------------------------------------- 516 517 @Override /* Session */ 518 public ObjectMap toMap() { 519 return super.toMap() 520 .append("RdfParserSession", new DefaultFilteringObjectMap() 521 ); 522 } 523}