package org.autoplot.pds; import com.opencsv.exceptions.CsvValidationException; import gov.nasa.pds.label.Label; import gov.nasa.pds.label.object.ArrayObject; import gov.nasa.pds.label.object.FieldDescription; import gov.nasa.pds.label.object.TableObject; import gov.nasa.pds.label.object.TableRecord; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.autoplot.datasource.AbstractDataSource; import org.autoplot.datasource.DataSetURI; import org.autoplot.datasource.URISplit; import org.autoplot.metatree.MetadataUtil; import org.das2.datum.DatumRangeUtil; import org.das2.datum.DatumUtil; import org.das2.datum.NumberUnits; import org.das2.datum.TimeUtil; import org.das2.datum.Units; import org.das2.datum.UnitsUtil; import org.das2.qds.ArrayDataSet; import org.das2.qds.DDataSet; import org.das2.qds.MutablePropertyDataSet; import org.das2.qds.QDataSet; import org.das2.qds.ops.Ops; import org.das2.qds.util.DataSetBuilder; import org.das2.util.monitor.NullProgressMonitor; import org.das2.util.monitor.ProgressMonitor; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * PDS4 file source. This is pointed at PDS4 xml files and will return data * they describe. * @author jbf */ public class PdsDataSource extends AbstractDataSource { public PdsDataSource(URI uri) { super(uri); } /** * new version of PDS library now throws CsvValidationException, catch this and * wrap it to look like IOException for now. * TODO: review this. * @param t the table * @return the TableRecord * @throws Exception */ private static TableRecord readNextTableRecord(TableObject t ) throws Exception { //try { return t.readNext(); //} catch ( CsvValidationException ex ) { // throw new IOException(ex); //} } /** * bootstrap routine for getting data from fields of a TableObject. TODO: rewrite so that * multiple fields are read at once. * @param t * @param columnName * @return * @throws IOException */ private QDataSet getFromTable( TableObject t, String[] columnNames ) throws Exception { int ncols= columnNames.length; int[] icols= new int[ncols]; DataSetBuilder dsb= new DataSetBuilder(2,100,ncols); for ( int i=0; i axisNames ) throws XPathExpressionException { XPathFactory factory= XPathFactory.newInstance(); XPath xpath= factory.newXPath(); String name = (String)xpath.evaluate( "axis_name", n, XPathConstants.STRING ); Double sequence_number = (Double)xpath.evaluate( "sequence_number", n, XPathConstants.NUMBER ); axisNames.put( sequence_number.intValue(), name ); } /** * return the name of the independent parameter that works in this axis. * This currently assumes the first node with this axisName is the * independent axis. * * For example, with https://space.physics.uiowa.edu/voyager/data/voyager-2-pws-wf/data/1987/vg2_pws_wf_1987-04-21T17_v1.0.xml, * if axisName=='time' then the result will be "Epoch" * * This shows where this logic fails: * https://pds-ppi.igpp.ucla.edu/data/maven-swea-calibrated/data/arc_pad/2016/03/mvn_swe_l2_arcpad_20160316_v04_r01.xml * For this file, I had to kludge in a test for the pitch angles. * * @param doc the xml document * @param axisName the axis name * @return null or the independent variable for the axis. * @throws javax.xml.xpath.XPathExpressionException */ public static String resolveIndependentAxis( Document doc, String axisName ) throws XPathExpressionException { XPathFactory factory= XPathFactory.newInstance(); XPath xpath= factory.newXPath(); String s= "Product_Observational/File_Area_Observational/Array[Axis_Array/axis_name='"+axisName +"']"; NodeList oo= (NodeList) xpath.evaluate( s, doc, XPathConstants.NODESET ); // jbf: I don't see how one can resolve the independent parameter properly. // I'll go through and find the lowest rank data with the axis. // "pitch angle" -> "pa" if ( oo.getLength()>0 ) { int best=0; for ( int i=0; i seekDependencies( Document doc, List depend ) throws XPathExpressionException { if ( depend.size()==1 ) { // always will have one element. XPathFactory factory= XPathFactory.newInstance(); XPath xpath= factory.newXPath(); String name= depend.get(0); Map axisNames= new LinkedHashMap<>(); NodeList oo= (NodeList) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/Axis_Array", doc, XPathConstants.NODESET ); for ( int i=0; i(depend); depend.add(0,n1); depend.add(1,n2); depend.add(2,n3); if ( n4!=null && !n4.equals(name) ) { depend.add(3,n4); } } else if ( axisNames.get(3)!=null ) { String n1= resolveIndependentAxis( doc, axisNames.get(1) ); String n2= resolveIndependentAxis( doc, axisNames.get(2) ); String n3= resolveIndependentAxis( doc, axisNames.get(3) ); depend= new LinkedList<>(depend); depend.add(0,n1); depend.add(1,n2); if ( n3!=null && !n3.equals(name) ) { depend.add(2,n3); } } else if ( axisNames.get(2)!=null ) { String n1= resolveIndependentAxis( doc, axisNames.get(1) ); String n2= resolveIndependentAxis( doc, axisNames.get(2) ); depend= new LinkedList<>(depend); depend.add(0,n1); if ( n2!=null && !n2.equals(name) ) { depend.add(1,n2); } } else if ( axisNames.get(1)!=null ) { String n1= resolveIndependentAxis( doc, axisNames.get(1) ); depend= new LinkedList<>(depend); if ( !n1.equals(name) ) { depend.add(0,n1); } } } return depend; } /** * given the bundle, figure out which files should be loaded to implement the time range. This will call recursively * into this code for each item. This unimplemented stub returns an empty dataset. * //TODO: implement me * @param doc the xml document * @param mon progress monitor * @return rank 0 stub * @throws Exception */ public org.das2.qds.QDataSet getDataSetFromBundle(Document doc,ProgressMonitor mon) throws Exception { XPathExpression xp= XPathFactory.newInstance().newXPath().compile( "//Product_Bundle/Bundle_Member_Entry/lidvid_reference/text()"); String lidvid= (String)xp.evaluate( doc, XPathConstants.STRING ); if ( lidvid.trim().length()==0 ) { throw new IllegalArgumentException("lidvid is empty or not found at "+ "//Product_Bundle/Bundle_Member_Entry/lidvid_reference/text()"); } return Ops.dataset(lidvid,Units.nominal()); } /** * given the collection, figure out which files should be loaded to implement the time range. This will call recursively * into this code for each item. This unimplemented stub returns an empty dataset. * //TODO: implement me * @param doc the xml document * @param mon progress monitor * @return rank 0 stub * @throws Exception */ public org.das2.qds.QDataSet getDataSetFromCollection(Document doc,ProgressMonitor mon) throws Exception { XPathExpression xp= XPathFactory.newInstance().newXPath().compile( "//Product_Collection/File_Area_Inventory/File/file_name/text()"); String csvfile= (String)xp.evaluate( doc, XPathConstants.STRING ); if ( csvfile.trim().length()==0 ) { throw new IllegalArgumentException("file name is empty or not found at "+ "//Product_Collection/File_Area_Inventory/File/file_name/text()"); } return Ops.dataset(csvfile,Units.nominal()); } @Override public org.das2.qds.QDataSet getDataSet(ProgressMonitor mon) throws Exception { String name= getParam("arg_0",""); URISplit split= URISplit.parse( getURI() ); File xmlfile = DataSetURI.getFile( split.resourceUri.toURL() ,new NullProgressMonitor()); Document doc= readXML(xmlfile); if ( doc.getDocumentElement().getNodeName().equals("Product_Bundle") ) { return getDataSetFromBundle(doc,mon); } if ( doc.getDocumentElement().getNodeName().equals("Product_Collection")) { return getDataSetFromCollection(doc,mon); } URL fileUrl= PdsDataSourceFactory.getFileResource( split.resourceUri.toURL(), mon ); DataSetURI.getFile(fileUrl,mon ); Label label = Label.open( xmlfile.toURI().toURL() ); List names= new ArrayList<>(); String X= getParam("X",""); if ( !X.equals("") ) { names.add(X); } String Y= getParam("Y",""); if ( !Y.equals("") ) { names.add(Y); } String Z= getParam("Z",""); if ( !Z.equals("") ) { names.add(Z); } if ( !name.equals("") ) { names.add(name); } List names1= seekDependencies(doc, names ); boolean okay= true; for ( int i=0; i tableColumnNames= new ArrayList<>(); List datasetColumnIndexes= new ArrayList<>(); for ( int i=0; i0 ) { QDataSet bresults= getFromTable( t, tableColumnNames.toArray(new String[tableColumnNames.size()]) ); for ( int iii=0; iii0 ) { result1.putProperty( QDataSet.UNITS, Units.lookupUnits(sunits) ); } if ( units==null || !UnitsUtil.isTimeLocation(units) ) { String labl= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/name/text()", doc ); // TODO: Stupid, isn't this? if ( labl.length()==0 ) labl= name; ((MutablePropertyDataSet)results[i]).putProperty( QDataSet.LABEL, labl ); String title= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/description/text()", doc ); if ( title.length()>0 ) { title= DocumentUtil.createTitleFrom(title); result1.putProperty( QDataSet.TITLE, title ); } String sfillValue= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/Special_Constants/invalid_constant/text()", doc ); if ( sfillValue.length()==0 ) sfillValue= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/Special_Constants/missing_constant/text()", doc ); String svalidMax= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/Special_Constants/valid_maximum/text()", doc ); String svalidMin= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Table_Character/Record_Character/Field_Character[name='"+name+"']/Special_Constants/valid_minimum/text()", doc ); if ( sfillValue.trim().length()>0 ) { double fillValue= Double.parseDouble(sfillValue); result1.putProperty( QDataSet.FILL_VALUE, fillValue ); } if ( svalidMax.trim().length()>0 ) { double validMax= Double.parseDouble(svalidMax); result1.putProperty( QDataSet.VALID_MAX, validMax ); } if ( svalidMin.trim().length()>0 ) { double validMin= Double.parseDouble(svalidMin); result1.putProperty( QDataSet.VALID_MIN, validMin ); } } } } } } for ( int i=0; i0 && units==null ) { result1.putProperty( QDataSet.UNITS, Units.lookupUnits(sunits) ); } if ( units==null || !UnitsUtil.isTimeLocation(units) ) { String labl= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/name/text()", doc ); if ( labl.length()==0 ) labl= name; ((MutablePropertyDataSet)results[i]).putProperty( QDataSet.LABEL, labl ); String title= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/description/text()", doc ); if ( title.length()>0 ) { result1.putProperty( QDataSet.TITLE, title.trim() ); } } String sfillValue= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/Special_Constants/invalid_constant/text()", doc ); if ( sfillValue.length()==0 ) sfillValue= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/Special_Constants/missing_constant/text()", doc ); String svalidMax= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/Special_Constants/valid_maximum/text()", doc ); String svalidMin= (String) xpath.evaluate( "//Product_Observational/File_Area_Observational/Array[name='"+name+"']/Special_Constants/valid_minimum/text()", doc ); if ( sfillValue.trim().length()>0 ) { double fillValue= Double.parseDouble(sfillValue); result1.putProperty( QDataSet.FILL_VALUE, fillValue ); } if ( svalidMax.trim().length()>0 ) { double validMax= Double.parseDouble(svalidMax); result1.putProperty( QDataSet.VALID_MAX, validMax ); } if ( svalidMin.trim().length()>0 ) { double validMin= Double.parseDouble(svalidMin); result1.putProperty( QDataSet.VALID_MIN, validMin ); } } } } } if ( result==null ) { switch (results.length) { case 1: result= results[0]; break; case 2: result= Ops.link( results[0], results[1] ); break; case 3: try { result= Ops.link( results[0], results[1], results[2] ); } catch ( Exception ex ) { ((MutablePropertyDataSet)results[2]).putProperty(QDataSet.DEPEND_1,null); result= results[2]; } break; case 4: try { result= Ops.link( results[0], results[1], results[2], results[3] ); } catch ( Exception ex ) { ((MutablePropertyDataSet)results[3]).putProperty(QDataSet.DEPEND_1,null); ((MutablePropertyDataSet)results[3]).putProperty(QDataSet.DEPEND_2,null); result= results[3]; } break; default: break; } } if ( result instanceof MutablePropertyDataSet ) { ((MutablePropertyDataSet)result).makeImmutable(); } return result; } @Override public Map getMetadata(ProgressMonitor mon) throws Exception { URISplit split= URISplit.parse( getURI() ); File xmlfile = DataSetURI.getFile( split.resourceUri.toURL() , mon ); Document doc= readXML(xmlfile); return DocumentUtil.convertDocumentToMap(doc); } }