package org.autoplot.jythonsupport; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.python.core.PySyntaxError; import org.python.parser.ast.If; import org.python.parser.ast.Module; import org.python.parser.ast.stmtType; import org.das2.util.LoggerManager; import org.python.parser.SimpleNode; import org.python.parser.ast.Assert; import org.python.parser.ast.Assign; import org.python.parser.ast.Attribute; import org.python.parser.ast.BinOp; import org.python.parser.ast.Call; import org.python.parser.ast.Index; import org.python.parser.ast.Name; import org.python.parser.ast.Num; import org.python.parser.ast.Subscript; import org.python.parser.ast.VisitorBase; import org.python.parser.ast.aliasType; import org.python.parser.ast.exprType; /** * AST support for Jython completions. This is not meant to be thorough, but * instead should be helpful when working with scripts. * @author jbf * @see JythonUtil#simplifyScriptToGetParams(java.lang.String, boolean) */ public class SimplifyScriptSupport { private static final Logger logger= LoggerManager.getLogger("jython.simplify"); private static final String GETDATASET_CODE= "def getDataSet( uri, timerange='', monitor='' ):\n" + " 'return a dataset for the given URI'\n" + " return dataset(0)\n\n"; public static String removeSideEffects( String script ) { String[] ss= script.split("\n"); String lastLine= ss[ss.length-1].trim(); if ( lastLine.endsWith(":") ) { ss= Arrays.copyOf(ss,ss.length-1); script= JythonUtil.join( ss, "\n" ); } Module n= (Module)org.python.core.parser.parse( script, "exec" ); HashSet variableNames= new HashSet(); int ilastLine= ss.length; return simplifyScriptToGetCompletions( ss, n.body, variableNames, 1, ilastLine, 0 ); } /** * useful for debugging * @param result * @param line * @return */ private static StringBuilder appendToResult( StringBuilder result, String line ) { result.append(line); return result; } /** * extracts the parts of the program that are quickly executed, generating a * code which can be run and then queried for completions. * * @param script the entire python program * @return the python program with lengthy calls removed. */ public static String simplifyScriptToCompletions( String script ) throws PySyntaxError { if ( script.trim().length()==0 ) return script; String[] ss= script.split("\n"); int lastLine= ss.length; // check for continuation in last getParam call. while ( ss.length>lastLine+1 && ss[lastLine].trim().length()>0 && Character.isWhitespace( ss[lastLine].charAt(0) ) ) { lastLine++; } // Chris showed that a closing bracket or paren doesn't need to be indented. See test038/jydsCommentBug.jyds if ( lastLine0 && count>0 ) { try { n = (Module)org.python.core.parser.parse( script, "exec" ); break; } catch ( PySyntaxError ex ) { // pop off the last line and try again. if ( ex0==null ) ex0= ex; lastLine--; script= JythonUtil.join( Arrays.copyOf(ss,lastLine), "\n" ); count--; } } if ( n==null ) throw ex0; String s= simplifyScriptToGetCompletions( ss, n.body, variableNames, 1, lastLine, 0 ); s= GETDATASET_CODE + s; s= "PWD='file:/tmp/'\n"+s; return s; } catch ( PySyntaxError ex ) { throw ex; } } private static String getIfBlock( String[] ss, If iff, stmtType[] body, HashSet variableNames, int firstLine, int lastLine1, int depth) { StringBuilder result= new StringBuilder(); String ss1= simplifyScriptToGetCompletions(ss, body, variableNames, firstLine, lastLine1, depth+1 ); if ( ss1.length()==0 ) { // String line; // if ( firstLine==0 && iff.beginLine>0 ) { // line= ss[body[0].beginLine-1]; // } else { // line= ss[iff.beginLine]; // } Pattern p= Pattern.compile("(\\s*)(\\S*).*"); Matcher m= p.matcher(ss[firstLine-1]); String indent; if ( m.matches() ) { indent= m.group(1); } else { indent= ""; } result.append(indent).append("pass ## SimplifyScriptSupport.getIfBlock \n"); logger.fine("things have probably gone wrong..."); } else { appendToResult( result,ss1); } return result.toString(); } /** * Extracts the parts of the program that get parameters or take a trivial amount of time to execute. * This may call itself recursively when if blocks are encountered. * See test038. * @param ss the entire script. * @param stmts statements being processed. * @param variableNames variable names that have been resolved. * @param beginLine first line of the script being processed, or -1 to use stmts[0].beginLine * @param lastLine INCLUSIVE last line of the script being processed. * @param depth recursion depth, for debugging. * @return the simplified script */ public static String simplifyScriptToGetCompletions( String[] ss, stmtType[] stmts, HashSet variableNames, int beginLine, int lastLine, int depth ) { int acceptLine= -1; // first line to accept int currentLine= beginLine; // current line we are writing (0 is first line). StringBuilder result= new StringBuilder(); for ( int istatement=0; istatement0 ? ss[o.beginLine-1] : "(bad line number)"; logger.log( Level.FINER, "line {0}: {1}", new Object[] { o.beginLine, theLine } ); if ( o.beginLine>0 ) { if ( beginLine<0 && istatement==0 ) acceptLine= o.beginLine; beginLine= o.beginLine; } else { acceptLine= beginLine; // elif clause in autoplot-test038/lastSuccessfulBuild/artifact/test038_demoParms1.jy } if ( beginLine>lastLine ) { continue; } if ( o instanceof org.python.parser.ast.If ) { if ( acceptLine>-1 ) { for ( int i=acceptLine; i0 && i-10 ) { if ( iff.orelse[0].beginLine>0 ) { lastLine1= iff.orelse[0].beginLine-1; // -1 is for the "else:" part. } else { if ( iff.orelse[0] instanceof If ) { lastLine1= ((If)iff.orelse[0]).test.beginLine-1; } else { logger.warning("failure to deal with another day..."); throw new RuntimeException("this case needs to be dealt with..."); } } } else if ( (istatement+1)=stmts.length ) { lastLine1= lastLine; } else { lastLine1= stmts[istatement+1].beginLine-1; } if ( iff.orelse[0].beginLine==0 ) { result.append("\n"); } else { if ( iff.orelse[0].beginLine>0 && ss[iff.orelse[0].beginLine-2].trim().startsWith("else:") ) { result.append(ss[iff.orelse[0].beginLine-2]).append("\n"); ss1= getIfBlock(ss, iff, iff.orelse, variableNames, beginLine+1, lastLine1, depth+1 ); appendToResult( result,ss1); } else { result.append(ss[iff.orelse[0].beginLine-1]).append("\n"); } } } } currentLine= lastLine1; acceptLine= -1; } else if ( o instanceof Assert ) { String m= maybeModelAssert((Assert)o,variableNames); if ( m!=null ) { result.append(m).append("\n"); currentLine= acceptLine; } } else { if ( simplifyScriptToGetCompletionsOkay( o, variableNames ) ) { if ( acceptLine<0 ) { acceptLine= (o).beginLine; for ( int i=currentLine+1; i-1 ) { int thisLine= (o).beginLine; for ( int i=acceptLine; i<=thisLine; i++ ) { if ( i0 && Character.isWhitespace(ss[i-1].charAt(0) ) ) { appendToResult(result,ss[i-1]).append("\n"); } } } appendToResult(result,"\n"); currentLine= thisLine; acceptLine= -1; } } } } if ( acceptLine>-1 ) { int thisLine= lastLine; for ( int i=acceptLine; i<=thisLine; i++ ) { appendToResult( result,ss[i-1]).append("\n"); } } return result.toString(); } /** * can we resolve this node given the variable names we know? * @param o * @param variableNames * @return true if the node can be resolved. */ private static boolean simplifyScriptToGetCompletionsCanResolve( SimpleNode o, HashSet variableNames ) { //if ( o.beginLine>=617 && o.beginLine<619 ) { // System.err.println( "here at 617-ish"); //} if ( o instanceof Name ) { Name c= (Name)o; if ( !variableNames.contains( c.id ) ) { logger.finest( String.format( "%04d canResolve->false: %s", o.beginLine, o.toString() ) ); return false; } } if ( o instanceof Attribute ) { Attribute at= (Attribute)o; while ( at.value instanceof Attribute || at.value instanceof Subscript ) { if ( at.value instanceof Attribute ) { at= (Attribute)at.value; } else { Subscript s= (Subscript)at.value; if ( s.value instanceof Attribute ) { at= (Attribute)s.value; } else { return false; // oh just give up... } } } if ( at.value instanceof Name ) { Name n= (Name)at.value; if ( !variableNames.contains( n.id ) ) return false; } } MyVisitorBase vb= new MyVisitorBase(variableNames); try { o.traverse(vb); logger.finest( String.format( " %04d canResolve->%s: %s", o.beginLine, vb.visitNameFail, o ) ); return vb.looksOkay || !vb.visitNameFail; } catch (Exception ex) { logger.log(Level.SEVERE, ex.getMessage(), ex); } logger.finest( String.format( "!! %04d canResolve->false: %s", o.beginLine, o ) ); return false; } /** * dumb kludge where no-arg constructor is called to get an instance for * completions. This is really an experiment... * @param a * @return */ private static String maybeModelAssert( Assert a, HashSet variableNames ) { if ( a.test instanceof Call ) { org.python.parser.ast.Call cc= ( org.python.parser.ast.Call)a.test; exprType f= cc.func; if ( f instanceof Name ) { if ( ((Name)f).id.equals("isinstance") ) { if ( cc.args.length==2 ) { exprType a1= cc.args[0]; if ( a1 instanceof Name ) { exprType a2= cc.args[1]; if ( a2 instanceof Name && variableNames.contains(((Name)a2).id)) { return String.format( "%s__class=%s # inserted by maybeModelAssert", ((Name)a1).id, ((Name)a2).id ); } } } } } return null; } else { return null; } } /** * return true if we can include this in the script without a huge performance penalty. * @param o the statement, for example an import or an assignment * @return true if we can include this in the script without a huge performance penalty. */ private static boolean simplifyScriptToGetCompletionsOkay( stmtType o, HashSet variableNames ) { logger.log(Level.FINEST, "simplify script line: {0}", o.beginLine); if ( ( o instanceof org.python.parser.ast.ImportFrom ) ) { org.python.parser.ast.ImportFrom importFrom= (org.python.parser.ast.ImportFrom)o; for ( aliasType a: importFrom.names ) { if ( a.asname!=null ) { variableNames.add( a.asname ); } else { variableNames.add( a.name ); } } return true; } if ( ( o instanceof org.python.parser.ast.Import ) ) { org.python.parser.ast.Import imporrt= (org.python.parser.ast.Import)o; for ( aliasType a: imporrt.names ) { if ( a.asname!=null ) { variableNames.add( a.asname ); } else { variableNames.add( a.name ); } } return true; } if ( ( o instanceof org.python.parser.ast.ClassDef ) ) return true; if ( ( o instanceof org.python.parser.ast.FunctionDef ) ) return true; if ( ( o instanceof org.python.parser.ast.Assign ) ) { Assign a= (Assign)o; if ( simplifyScriptToGetCompletionsOkayNoCalls( a.value, variableNames ) ) { if ( !simplifyScriptToGetCompletionsCanResolve(a.value, variableNames ) ) { return false; } for (exprType target : a.targets) { exprType et = (exprType) target; if (et instanceof Name) { String id = ((Name) target).id; variableNames.add(id); logger.log(Level.FINEST, "assign to variable {0}", id); } else if ( et instanceof Attribute ) { Attribute at= (Attribute)et; while ( at.value instanceof Attribute || at.value instanceof Subscript ) { if ( at.value instanceof Attribute ) { at= (Attribute)at.value; } else { Subscript s= (Subscript)at.value; if ( s.value instanceof Attribute ) { at= (Attribute)s.value; } else { return false; // oh just give up... } } } if ( at.value instanceof Name ) { Name n= (Name)at.value; if ( !variableNames.contains( n.id ) ) return false; } } else if ( et instanceof Subscript ) { Subscript subscript= (Subscript)et; exprType et2= subscript.value; if ( et2 instanceof Name ) { Name n= (Name)et2; if ( variableNames.contains( n.id ) ) return true; } return false; } else { return false; } } return true; } else { return false; } } if ( ( o instanceof org.python.parser.ast.If ) ) { return simplifyScriptToGetCompletionsOkayNoCalls(o,variableNames); } if ( ( o instanceof org.python.parser.ast.Print ) ) return false; logger.log( Level.FINEST, "not okay to simplify: {0}", o); return false; } /** * inspect the node to look for function calls that are not to the function "getParam". This is awful code that * will be rewritten when we upgrade Python to 2.7. * @param o * @param variableNames * @return */ private static boolean simplifyScriptToGetCompletionsOkayNoCalls( SimpleNode o, HashSet variableNames ) { if ( o instanceof Call ) { Call c= (Call)o; if ( !trivialFunctionCall(c) ) { if ( !trivialConstructorCall(c) ) { logger.finest( String.format( "%04d simplify->false: %s", o.beginLine, o.toString() ) ); return false; } } } MyVisitorBase vb= new MyVisitorBase(variableNames); try { o.traverse(vb); logger.finest( String.format( " %04d simplify->%s: %s", o.beginLine, vb.looksOkay(), o ) ); return vb.looksOkay(); } catch (Exception ex) { logger.log(Level.SEVERE, ex.getMessage(), ex); } logger.finest( String.format( "!! %04d simplify->false: %s", o.beginLine, o ) ); return false; } /** * there are a number of functions which take a trivial amount of time to execute and are needed for some scripts, * such as the string.upper() function. The commas are to guard against the id being a subset of another * id ("lower," does not match "lowercase"). * TODO: update this after Python upgrade. */ private static final String[] okay= new String[] { "range,", "xrange,", "irange,", "getParam,", "getDataSet,", "lower,", "upper,", "URI,", "URL,", "DatumRangeUtil,", "TimeParser,", "str,", "int,", "long,", "float,", "datum,", "datumRange,", "dataset,", "indgen,","findgen,", "dindgen,", "ones,", "zeros,", "linspace,", "logspace,", "dblarr,", "fltarr,", "strarr,", "intarr,", "bytarr,", "ripples,", "split,", "color,", "colorFromString,", "isinstance," }; private static final Set okaySet= new HashSet<>(); static { for ( String o: okay ) okaySet.add(o.substring(0,o.length()-1)); } private static String getFunctionName( exprType t ) { if ( t instanceof Name ) { return ((Name)t).id; } else if ( t instanceof Attribute ) { Attribute a= (Attribute)t; return getFunctionName(a.value)+"."+a.attr; } else { return t.toString(); } } /** * return true if the function call is trivial to execute and can be evaluated within a few milliseconds. For example, * findgen can be called because no calculations are made in the call, but fft cannot. Typically these are Order 1 (a.k.a. * constant time) operations, but also many Order N operations are so fast they are allowed. * @param sn an AST node pointed at a Call. * @return true if the function call is trivial to execute */ private static boolean trivialFunctionCall( SimpleNode sn ) { if ( sn instanceof Call ) { Call c= (Call)sn; boolean klugdyOkay= false; String ss= c.func.toString(); // we just want "DatumRangeUtil" of the Attribute //String ss= getFunctionName(c.func); for ( String s: okay ) { if ( ss.contains(s) ) klugdyOkay= true; } if ( klugdyOkay==false ) { if ( ss.contains("TimeUtil") && ss.contains("now") ) { klugdyOkay= true; } } logger.log(Level.FINER, "trivialFunctionCall={0} for {1}", new Object[]{klugdyOkay, c.func.toString()}); return klugdyOkay; } else { return false; } } /** * return true if the function call is trivial to execute because it's a constructor, * which presumably takes little time to create. * @param sn * @return true if it is a constructor call */ private static boolean trivialConstructorCall( SimpleNode sn ) { if ( sn instanceof Call ) { Call c= (Call)sn; if ( c.func instanceof Name ) { String funcName= ((Name)c.func).id; return Character.isUpperCase(funcName.charAt(0)); } else if ( c.func instanceof Attribute ) { // Rectangle.Double String funcName= ((Attribute)c.func).attr; return Character.isUpperCase(funcName.charAt(0)); } else { return false; } } else { return false; } } private static class MyVisitorBase extends VisitorBase { boolean looksOkay= true; boolean visitNameFail= false; HashSet names; MyVisitorBase( HashSet names ) { this.names= names; } @Override public Object visitName(Name node) throws Exception { logger.log(Level.FINER, "visitName({0})", node); if ( !names.contains(node.id) ) { visitNameFail= true; } return super.visitName(node); } @Override public Object visitCall(Call node) throws Exception { logger.log(Level.FINER, "visitCall({0})", node); return super.visitCall(node); } @Override protected Object unhandled_node(SimpleNode sn) throws Exception { return sn; } @Override public void traverse(SimpleNode sn) throws Exception { logger.log(Level.FINER, "traverse({0})", sn); if ( sn instanceof Call ) { looksOkay= trivialFunctionCall(sn) || trivialConstructorCall(sn); logger.log(Level.FINER, "looksOkay={0}", looksOkay); } else if ( sn instanceof Assign ) { Assign a= ((Assign)sn); exprType et= a.value; if ( et instanceof Call ) { looksOkay= trivialFunctionCall(et) || trivialConstructorCall(sn); logger.log(Level.FINER, "looksOkay={0}", looksOkay); } } else if ( sn instanceof Name ) { String t= ((Name)sn).id; if ( t.length()>1 && Character.isUpperCase( t.charAt(0) ) ) { logger.log(Level.FINER, "name is assumed to be a constructor call name: {0}", t); return; } if ( !names.contains(t) && !okaySet.contains(t)) { looksOkay= false; // TODO: why are there both looksOkay and visitNameFail? logger.log(Level.FINER, "looksOkay={0}", looksOkay); } } else if ( sn instanceof Attribute ) { traverse( ((Attribute)sn).value ); // DatumRangeUtil } else if ( sn instanceof Subscript ) { Subscript ss= (Subscript)sn; exprType et= ss.value; if ( et instanceof Name ) { traverse((Name)(et)); } //ss.value; //visitName((Name)) } else if ( sn instanceof BinOp ) { BinOp bo= (BinOp)sn; traverse( bo.left ); traverse( bo.right ); } else if ( sn instanceof Num ) { } else if ( sn instanceof Index ) { Index index= (Index)sn; traverse( index.value ); } else { logger.log(Level.FINE, "unchecked: {0}", sn); } } public boolean looksOkay() { return looksOkay; } /** * this contains a node whose name we can't resolve. * @return */ public boolean visitNameFail() { return visitNameFail; } } }