Monday, August 02, 2010

Turning functional code into great code: Part 3: Duplication and third-party APIs

Hello, and welcome back! This is part 3 of an ongoing series on turning functional code into great code. If you haven't read part 1 or part 2, go ahead and do so, then come on back!

Last time, we saw how to improve the StylesheetCreator class using standard Java language features and APIs. This time we'll go a little further and see what benefits certain third-party APIs can give us. But first, let's finally take a look at that StylesheetHelper class that we've heard so much about. Warning, it's long -- you may just want to skim it, then refer back to it as we're refactoring it.


package net.galluzzo.example.stylesheetbuilder.refactored.part3;

public class StylesheetHelper
{
private String ruleCondition = "";
private String childNodes = "";
private String rejectedChildNodes = "";
private String parentNodeName = "";
private String childNodeNames = "";
private String rejectNodeContents = "";

public String getStylesheetHeader( String parentNodeText,
String childNodeList )
{
parentNodeName = parentNodeText;
childNodeNames = childNodeList;
String stylesheetHeader =
"<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
+ "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:ns1=\"http://www.galluzzo.net/example/stylesheet-builder\" xmlns:java=\"net.galluzzo.example.stylesheetbuilder.Directives\" exclude-result-prefixes=\"java\">"
+ "<xsl:output method=\"xml\" indent=\"yes\" encoding=\"UTF-8\"/>"
+ "<xsl:template match=\"/\">"
+ "<" + parentNodeText + "s>"
+ "<xsl:for-each select=\"ns1:" + parentNodeText + "s/"
+ parentNodeText + "\">";

return stylesheetHeader;
}

public String getStylesheetFooter( String parentNodeText )
{
String stylesheetFooter = "</xsl:for-each></" + parentNodeText + "s>"
+ "</xsl:template></xsl:stylesheet>";

return stylesheetFooter;
}

private void setRejectedNodeContents( String nodeName, long param,
String ruleText )
{
rejectNodeContents +=
"<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + param + "'\" />";
}

private void setRejectedNodeContents( String nodeName, String param,
String ruleText )
{
rejectNodeContents += "<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + param + "'\" />";
}

private void setRejectedNodeContents( String nodeName, int max,
String ruleText, int min )
{
rejectNodeContents += "<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + min + " and " + max
+ "'\" />";
}

private void setRejectedNodeContents( String nodeName, String max,
String ruleText, String min )
{
rejectNodeContents +=
"<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + min + " and " + max
+ "'\" />";
}

private void setRuleCondition( boolean result, int ruleIndex )
{
String clause = "($result" + ruleIndex + "='" + result + "')";

if ( ruleCondition == "" )
{
ruleCondition = clause;
System.out.println( "ruleCondition = " + ruleCondition );
}
else
{
ruleCondition += " and " + clause;
System.out.println( "ruleCondition = " + ruleCondition );
}
}

private void setRejectedNode( String rejectedNodeText )
{
if ( !rejectedChildNodes.contains( "<" + rejectedNodeText + ">" ) )
{
rejectedChildNodes += "<" + rejectedNodeText
+ "><xsl:value-of select=\"" + rejectedNodeText + "\"/>" + "</"
+ rejectedNodeText + ">";
}
}

private void setNodeText( String nodeText )
{
System.out.println( "nodeText = " + nodeText );
if ( !childNodes.contains( "<" + nodeText + ">" )
&& !( nodeText.equals( "" ) ) )
{
childNodes += "<" + nodeText + "><xsl:value-of select=\"" + nodeText
+ "\"/>" + "</" + nodeText + ">";
}
}

private void setNodeText( String nodeText, int resultVarIndex )
{
childNodes += "<" + nodeText + "><xsl:value-of select=\"$result"
+ resultVarIndex + "\"/>" + "</" + nodeText + ">";
}

public String getXsltChooseStart()
{
String strChooseStart = "<xsl:choose><xsl:when test = \""
+ ruleCondition + "\">";
System.out.println( "ruleCondition = " + ruleCondition );

return strChooseStart;
}

public String getXsltWhenEnd()
{
String strXsltWhenEnd = "</xsl:when>";

return strXsltWhenEnd;
}

public String getXsltChooseEnd()
{
String strXsltChooseEnd = "</xsl:otherwise></xsl:choose>";

return strXsltChooseEnd;
}

public String getXsltOtherwiseStart()
{
String strXsltOtherwiseStart = "<xsl:otherwise>";

return strXsltOtherwiseStart;
}

public String getParentNode()
{
System.out.println( "childNodeNames = " + childNodeNames );
if ( !childNodeNames.equals( "" ) )
{
prepareChildNodes( childNodeNames );
}
String strBodyText = "<" + parentNodeName + ">" + childNodes + "</"
+ parentNodeName + ">";

return strBodyText;
}

public String getRejectedParentNode()
{
prepareRejectedChildNodes( childNodeNames );
String strBodyText = "<" + parentNodeName + "_Rejected" + ">"
+ rejectedChildNodes + "<Reject>" + rejectNodeContents
+ "</Reject></" + parentNodeName + "_Rejected" + ">";

return strBodyText;
}

private void prepareChildNodes( String childNodeNameList )
{
System.out.println( "Inside prepareChildNodes" );
String[] strList = childNodeNameList.split( "," );
for ( int i = 0; i < strList.length; i++ )
{
setNodeText( strList[i] );
}
}

private void prepareRejectedChildNodes( String childNodeNameList )
{
System.out.println( "Inside prepareRejectedChildNodes" );
String[] strList = childNodeNameList.split( "," );
for ( int i = 0; i < strList.length; i++ )
{
setRejectedNode( strList[i] );
}
}

// Rule methods

public String concatRule( int ruleIndex, String varNodeName,
String param1 )
{
System.out.println( "In concatRule" );
if ( ruleCondition == "" )
{
ruleCondition = "1";
}

String concatText = "<xsl:variable name=\"var" + ruleIndex +
"\" select=\"" + varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:concat($var" + ruleIndex + ",'" + param1
+ "')\"/>";

System.out.println( "concatText = " + concatText );

setNodeText( varNodeName, ruleIndex );
setRejectedNode( varNodeName );
return concatText;
}

public String replaceRule( int ruleIndex, String oldValue,
String varNodeName, String newValue )
{
if ( ruleCondition == "" )
{
ruleCondition = "1";
}

String concatText = "<xsl:variable name=\"var" + ruleIndex
+ "\" select=\"" + varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:replace('" + oldValue + "'," + "$var"
+ ruleIndex + ",'" + newValue + "')\"/>";

setNodeText( varNodeName, ruleIndex );
setRejectedNode( varNodeName );
return concatText;
}

public String greaterThanRule( int ruleIndex, String varNodeName,
long param1 )
{
String greaterThanText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ "number(" + varNodeName + ")\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:greaterThan($var" + ruleIndex + "," + param1
+ ")\"/>";
String errorMessage = " is not greater than ";

setRejectedNodeContents( varNodeName, param1, errorMessage );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return greaterThanText;
}

public String greaterThanRule( int ruleIndex, String varNodeName,
String param1 )
{
String greaterThanText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:greaterThan($var" + ruleIndex + ",'" + param1
+ "')\"/>";

setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return greaterThanText;
}

public String lessThanRule( int ruleIndex, String varNodeName,
long param1 )
{
String lessThanText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ "number(" + varNodeName + ")\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:lessThan($var" + ruleIndex + "," + param1
+ ")\"/>";
String errorMessage = " is not less than ";

setRejectedNodeContents( varNodeName, param1, errorMessage );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return lessThanText;
}

public String lessThanRule( int ruleIndex, String varNodeName,
String param1 )
{
String lessThanText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:lessThan($var" + ruleIndex + ",'" + param1
+ "')\"/>";

String errorMessage = " is not less than ";
setRejectedNodeContents( varNodeName, param1, errorMessage );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return lessThanText;
}

public String lengthRule( int ruleIndex, String varNodeName, int max )
{
String equalText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:length($var" + ruleIndex + "," + max + ")\"/>";

String errorMessage = " length is not equal to ";
setRejectedNodeContents( varNodeName, max, errorMessage );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return equalText;
}

public String betweenRule( int ruleIndex, int min, String varNodeName,
int max )
{
String betweenText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:between(" + min + "," + "$var" + ruleIndex
+ "," + max + ")\"/>";

String errorMessage = " is not between ";
setRejectedNodeContents( varNodeName, max, errorMessage, min );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return betweenText;
}

public String betweenRule( int ruleIndex, String min, String varNodeName,
String max )
{
String betweenText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:between('" + min + "'," + "$var" + ruleIndex
+ ",'" + max + "')\"/>";

String errorMessage = " is not between ";
setRejectedNodeContents( varNodeName, max, errorMessage, min );
setRuleCondition( true, ruleIndex );
setNodeText( varNodeName );
setRejectedNode( varNodeName );
return betweenText;
}
}


(Note that in the original implementation, there were several other "rule" methods; I have cut them down to the current set here for relative brevity.)

Let's take stock of this class. One good thing about it is that its methods are small and typically have meaningful names. However, it contains quite a lot of duplicated code, and there are a few method names that are misleading.

Method Names



First, we'll address the method names, since it's the easiest part (Alt-Shift-R in Eclipse to rename a method and all its references; I'm sure there are similar keystrokes in other IDEs). When a method name starts with "get" or "set", Java developers will assume that these are JavaBean-style accessor methods. Therefore, methods like setNodeText, which actually append to a string, should be renamed to something like addNodeText. Here's the full list of method renaming that I did:







OldNew
setNodeText(String)addChildNodeIfNotPresent
setNodeText(String, int)addProcessedChildNode
setRejectedNodeaddRejectedNode
setRejectedNodeContentsaddRejectedNodeContents
setRuleConditionaddRuleCondition


After renaming, we can run our unit test, and it still works, so we know we haven't broken anything.

Duplication



One of the cardinal rules of software design is Don't Repeat Yourself (DRY for short). If you have repeated code -- even small pieces of repeated code -- then not only is the code longer, but changes to one section have to be repeated in other sections. This makes it very easy to miss, especially if the person maintaining the code is not the person writing it, and therefore introduces bugs.

Admittedly, we're not really covering design issues yet, just third-party API usage (we'll get to design in part 4). However, we can introduce third-party APIs in much fewer places if we reduce the duplicated code first. So before we go any further, let's clean this up.

We'll start from the top down -- there are four addRejectedNodeContents methods now, and they're mostly the same. The first two are almost identical, and the second two are almost identical. So let's combine each pair as follows:


private void addRejectedNodeContents( String nodeName, long param,
String ruleText )
{
addRejectedNodeContents( nodeName, String.valueOf( param ), ruleText );
}

private void addRejectedNodeContents( String nodeName, String param,
String ruleText )
{
rejectNodeContents += "<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + param + "'\" />";
}

private void addRejectedNodeContents( String nodeName, int max,
String ruleText, int min )
{
addRejectedNodeContents( nodeName, String.valueOf( max ), ruleText,
String.valueOf( min ) );
}

private void addRejectedNodeContents( String nodeName, String max,
String ruleText, String min )
{
rejectNodeContents +=
"<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + ruleText + min + " and " + max
+ "'\" />";
}


But there's still duplication between the second and fourth methods. If we factor out the error message construction (which I've renamed from "ruleText" to "errorMessage" to be clearer), then we can move to one method that actually constructs the XSLT.


private void addRejectedNodeContents( String nodeName, long param,
String errorMessage )
{
addRejectedNodeContents( nodeName, String.valueOf( param ),
errorMessage );
}

private void addRejectedNodeContents( String nodeName, String param,
String errorMessage )
{
addRejectedNodeContents( nodeName, errorMessage + param );
}

private void addRejectedNodeContents( String nodeName, int max,
String errorMessage, int min )
{
addRejectedNodeContents( nodeName, String.valueOf( max ), errorMessage,
String.valueOf( min ) );
}

private void addRejectedNodeContents( String nodeName, String max,
String errorMessage, String min )
{
addRejectedNodeContents( nodeName, errorMessage + min + " and " + max );
}

private void addRejectedNodeContents( String nodeName, String errorMessage )
{
rejectNodeContents +=
"<xsl:value-of select=\"' Rejected: '\" />"
+ "<xsl:value-of select=\"" + nodeName + "\" />"
+ "<xsl:value-of select=\"'" + errorMessage + "'\" />";
}


Hooray, no duplication! If we run our unit test again, we'll see that it still works.

The rest of the duplicated code is mostly in the rule methods. From a brief analysis, we can see that the concatRule and replaceRule methods are similar to each other (string processing rules); and that the other rule methods are similar to each other (validation rules). Let's start by combining concatRule and replaceRule as follows.


public String concatRule( int ruleIndex, String varNodeName,
String param1 )
{
System.out.println( "In concatRule" );
String resultExpression =
"java:concat($var" + ruleIndex + ",'" + param1 + "')";
return stringProcessingRule( ruleIndex, varNodeName, resultExpression );
}

public String replaceRule( int ruleIndex, String oldValue,
String varNodeName, String newValue )
{
String resultExpression =
"java:replace('" + oldValue + "'," + "$var" + ruleIndex + ",'"
+ newValue + "')";
return stringProcessingRule( ruleIndex, varNodeName, resultExpression );
}

private String stringProcessingRule( int ruleIndex, String varNodeName,
String resultExpression )
{
if ( ruleCondition.equals( "" ) )
{
ruleCondition = "1";
}

String concatText = "<xsl:variable name=\"var" + ruleIndex
+ "\" select=\"" + varNodeName + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex + "\" select=\" "
+ resultExpression + ")\"/>";

System.out.println( "concatText = " + concatText );

addProcessedChildNode( varNodeName, ruleIndex );
addRejectedNode( varNodeName );
return concatText;
}


I've also replaced the following snippet:


if ( ruleCondition == "" )


with the following:


if ( ruleCondition.equals( "" ) )


In Java, one must always compare strings using the equals method, not via the == operator. In this particular case, the code will work, because the "" string will be interned by the compiler, so only one copy will be stored in the class file. But if an instance of the class is serialized, or some maintenance programmer comes along and writes code that appends the empty string to ruleCondition in some cases, or whatever, then suddenly "" will not be the same object in memory as "", and your code will have very difficult-to-find bugs in it. Furthermore, if you were comparing a static string to a ruleCondition that had already been built up dynamically, the results would never be equal. So in general, always use equals to compare strings.

Now on to the other rules. We can combine the two greaterThanRule methods as follows:


public String greaterThanRule( int ruleIndex, String varNodeName,
long param1 )
{
return greaterThanRule( ruleIndex, varNodeName,
"number(" + varNodeName + ")", String.valueOf( param1 ),
String.valueOf( param1 ) );
}

public String greaterThanRule( int ruleIndex, String varNodeName,
String param1 )
{
return greaterThanRule( ruleIndex, varNodeName, varNodeName, param1,
"'" + param1 + "'" );
}

private String greaterThanRule( int ruleIndex, String varNodeName,
String varValue, String param, String paramValue )
{
String greaterThanText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varValue + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:greaterThan($var" + ruleIndex + ","
+ paramValue + ")\"/>";

String errorMessage = " is not greater than ";
addRejectedNodeContents( varNodeName, param, errorMessage );
addRuleCondition( true, ruleIndex );
addChildNodeIfNotPresent( varNodeName );
addRejectedNode( varNodeName );
return greaterThanText;
}


We can make similar changes for lessThanRule, and for betweenRule. The betweenRule code is shown below:


public String betweenRule( int ruleIndex, int min, String varNodeName,
int max )
{
return betweenRule( ruleIndex, String.valueOf( min ),
String.valueOf( min ), varNodeName, "number(" + varNodeName + ")",
String.valueOf( max ), String.valueOf( max ) );
}

public String betweenRule( int ruleIndex, String min, String varNodeName,
String max )
{
return betweenRule( ruleIndex, min, "'" + min + "'", varNodeName,
varNodeName, max, "'" + max + "'" );
}

private String betweenRule( int ruleIndex, String min, String minValue,
String varNodeName, String varValue, String max, String maxValue )
{
String betweenText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varValue + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:between(" + minValue + "," + "$var" + ruleIndex
+ "," + maxValue + ")\"/>";

String errorMessage = " is not between ";
addRejectedNodeContents( varNodeName, max, errorMessage, min );
addRuleCondition( true, ruleIndex );
addChildNodeIfNotPresent( varNodeName );
addRejectedNode( varNodeName );
return betweenText;
}


You may have noticed that the generic betweenRule method now has seven parameters. This is way too many. Ideally a method should require no more than three parameters (fewer if possible); more than that tends to indicates a design problem of some sort. And indeed there is a design problem here...but we'll tackle that in a later installment.

We're getting there, but we still have four rule methods (greaterThanRule, lessThanRule, lengthRule and betweenRule) that share a lot of code. The first three are similar, since they both take a single parameter; the betweenRule is a little different, since it takes two parameters (the min and max). So let's consolidate the first three methods:


private String greaterThanRule( int ruleIndex, String varNodeName,
String varValue, String param, String paramValue )
{
return oneParameterValidationRule( ruleIndex, varNodeName, varValue,
"greaterThan", param, paramValue, " is not greater than " );
}

private String lessThanRule( int ruleIndex, String varNodeName,
String varValue, String param, String paramValue )
{
return oneParameterValidationRule( ruleIndex, varNodeName, varValue,
"lessThan", param, paramValue, " is not less than " );
}

public String lengthRule( int ruleIndex, String varNodeName, int max )
{
return oneParameterValidationRule(ruleIndex, varNodeName, varNodeName,
"length", String.valueOf( max ), String.valueOf( max ),
" length is not equal to " );
}

private String oneParameterValidationRule( int ruleIndex,
String varNodeName, String varValue, String functionName, String param,
String paramValue, String errorMessage )
{
String ruleText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varValue + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" java:" + functionName + "($var" + ruleIndex + ","
+ paramValue + ")\"/>";

addRejectedNodeContents( varNodeName, errorMessage + param );
addRuleCondition( true, ruleIndex );
addChildNodeIfNotPresent( varNodeName );
addRejectedNode( varNodeName );
return ruleText;
}


And finally, we can consolidate the oneParameterValidationRule and the betweenRule into a single validationRule:


private String oneParameterValidationRule( int ruleIndex,
String varNodeName, String varValue, String functionName, String param,
String paramValue, String errorMessage )
{
String resultExpression =
"java:" + functionName + "($var" + ruleIndex + "," + paramValue
+ ")";
return validationRule( ruleIndex, varNodeName, varValue,
resultExpression, errorMessage + param );
}

private String betweenRule( int ruleIndex, String min, String minValue,
String varNodeName, String varValue, String max, String maxValue )
{
String resultExpression =
"java:between(" + minValue + "," + "$var" + ruleIndex + ","
+ maxValue + ")";
return validationRule( ruleIndex, varNodeName, varValue,
resultExpression, " is not between " + min + " and " + max );
}

private String validationRule( int ruleIndex, String varNodeName,
String varValue, String resultExpression, String errorMessage )
{
String ruleText =
"<xsl:variable name=\"var" + ruleIndex + "\" select=\""
+ varValue + "\"/>"
+ "<xsl:variable name=\"result" + ruleIndex
+ "\" select=\" " + resultExpression + "\"/>";

addRejectedNodeContents( varNodeName, errorMessage );
addRuleCondition( true, ruleIndex );
addChildNodeIfNotPresent( varNodeName );
addRejectedNode( varNodeName );
return ruleText;
}


If we wanted to be really thorough, we would combine stringProcessingRule and validationRule; but this is probably not worth the effort at this point, since the two methods are fairly different.

Now that we've consolidated a lot of these methods, we can actually remove all the addRejectedNodeContents methods that we refactored earlier except for the fifth one that contains all the guts of the code in it. None of the rest are used anymore!

However, we have one problem: by this point, the test is failing. Why is that? Well, it turns out there were originally errors in one of the betweenRule methods (which did not put the number() function around the variable in the integer case) and one of the greaterThanRule methods (which did not put the rejection error message in the output). By adding test cases and consolidating all these duplicated methods, we have exposed and eliminated bugs in the original implementation.

Language Usage



One of the changes we wanted to make to StylesheetCreator in part 2 was to pass in a List<String> for the child node names, not a comma-delimited String containing all the names. Since this alters the public API of StylesheetCreator, let's alter our test first. We'll change this:


stylesheetCreator.createStylesheet( "employee",
"name,age,salary,category,department,bonus", rules, "test.xsl" );


to this:


stylesheetCreator.createStylesheet(
"employee",
Arrays.asList( "name", "age", "salary", "category", "department",
"bonus" ),
rules,
"test.xsl" );


The only reason we use Arrays.asList is that it's a quick shortcut for making an array from a fixed list of items. Java 7 is supposed to introduce list literals, at which point one will be able to type ["name", "age", "salary"] and so forth, like many other languages.

So now that we've altered the test case, of course, it won't compile! Let's alter StylesheetCreator to suit. We'll change it from this:


public void createStylesheet( String parentNodeName, String childNodeNames,
List<List<String>> ruleList, String outputFilename ) throws Exception
{
...
// Get header and footer stylesheet content.
String stylesheetHeader = stylesheetHelper.getStylesheetHeader(
parentNodeName, childNodeNames );
...
}


to this:


public void createStylesheet( String parentNodeName,
List<String> childNodeNames, List<List<String>> ruleList,
String outputFilename ) throws Exception
{
...
// Get header and footer stylesheet content.
String stylesheetHeader = stylesheetHelper.getStylesheetHeader(
parentNodeName, childNodeNames );
...
}


And we'll change StylesheetHelper to suit:


public class StylesheetHelper
{
...
private List<String> childNodeNames = null;
...

public String getStylesheetHeader( String parentNodeName,
List<String> childNodeNames )
{
this.parentNodeName = parentNodeName;
this.childNodeNames = childNodeNames;
...
}

public String getParentNode()
{
...
if ( !childNodeNames.isEmpty() )
{
prepareChildNodes();
}
...
}

public String getRejectedParentNode()
{
prepareRejectedChildNodes();
...
}

private void prepareChildNodes()
{
...
for ( String childNodeName : childNodeNames )
{
addChildNodeIfNotPresent( childNodeName );
}
}

private void prepareRejectedChildNodes()
{
...
for ( String childNodeName : childNodeNames )
{
addRejectedNode( childNodeName );
}
}
}


Since childNodeNames is a member variable, we can remove the parameter from the prepareChildNodes and prepareRejectedChildNodes methods. We no longer have to call String.split, which, as we pointed out previously, could result in incorrect results if the strings contain commas (which they shouldn't, being XML node names). We changed:

if ( !childNodeNames.equals( "" ) )


to:

if ( !childNodeNames.isEmpty() )


And we were able to use the more readable, concise enhanced for loop.

API Usage



Where would it make sense to use third-party libraries in our code? I can think of three libraries that may be useful.


  • Logging. The current code makes heavy use of System.out.println. While this is fine for very small applications, it is unwieldy for larger ones. Logging libraries allow you to control at runtime which log statements will appear, classify log statements by severity, control the format of each log statement including timestamps and class names, output to rolling log files, issue email alerts for certain classes of errors, and so forth.

    I'll use SLF4J on top of Log4J 1.2, although there are other good solutions like Commons Logging. The JDK comes with its own logging framework, but it is not as good as Log4J. By using a wrapper library like SLF4J or Commons Logging, you can swap out the logging implementation without changing all your code.

  • String escaping utilities. At present, none of the strings that are passed into the XML are escaped, including the string parameter values. If these are entered by a malicious user from a web page, or even accidentally entered incorrectly inside code, then invalid XML could be output -- or worse, valid XML that is controlled by the user to take advantage of a vulnerability in the program. You could think of this as XSLT injection.

    To combat this, we need to escape every parameter and value that is passed into StylesheetHelper. We could craft our own escaping utilities for doing this, but it would be easier to use something like StringEscapeUtils from Commons Lang.

  • XML creation. The main focus of the code we're looking at is to output an XSLT stylesheet, which is a type of XML. Though it's fairly easy to output XML manually, as we are currently doing, there are lots of pitfalls. For example, there's the encoding issue (currently StylesheetHelper outputs ISO-8559-1, which does not support any "special" characters), and the escaping issue mentioned above. Also, even if we output perfectly valid XML, we'd have to struggle mightily to get it all indented nicely for readability.

    We could use the DOM API built into Java for this purpose, but it's pretty unwieldy. Instead I'd recommend a library like JDOM, DOM4J, or XOM. For now, I'm not going to use an XML library, because it would change quite a bit of the code, and I'm lazy. For a real production application, though, I would use one. And perhaps when the code is designed a little better, we'll go ahead and do that.



Logging



So first, let's look at logging. By convention, a separate logger is created for each class; this enables us to differentiate output by classes in the resulting log file.


import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MyClass
{
private static final Logger log = LoggerFactory.getLogger( MyClass.class );
}


It is private because no other classes should use it; static because it is shared amongst all class instances (and can be used by static methods); and final because it should never be re-initialized. We'll put a statement like this at the top of both StylesheetCreator and StylesheetHelper.

To actually log a message, there are several different methods, corresponding to different severities of error, called "categories" in Log4J:


  • fatal: indicates that a fatal error has occurred
  • error: indicates that some error (unexpected condition) has occurred; often used when an exception is thrown unexpectedly
  • warn: warn that something happened that might be problematic, but is recoverable
  • info: log a statement that may be of interest, but does not indicate a problem
  • debug: log a statement that is only useful when debugging the application
  • trace: log the entrance to or exit from a method, or other messages that are at a very fine level of detail


In that vein, let's change all the lines that look something like this:


System.out.println( "Inside createStylesheet" );


to lines like this:


log.trace( "Inside createStylesheet" );


Likewise, we'll change the debugging statements like this:


System.out.println("Rule table values: " + columnName + " " +
ruleId + " " + param1 + " " + param2 );


to statements like this:


log.debug("Rule table values: {} {} {} {}",
new Object[] { columnName, ruleId, param1, param2 } );


In the previous example, we used a trick that SLF4J enables, where each instance of {} is replaced by the corresponding parameter in the following list. Why would we want to do this instead of the following, more traditional code?


log.debug("Rule table values: " + columnName + " " + ruleId +
" " + param1 + " " + param2 );


Well, there are two reasons. When there are several parameters, it can sometimes be more readable to see the whole string at once than lots of concatenations. But the most compelling reason is that if you are printing out objects with computationally-intensive toString methods, then those methods will only be called if the log statement is actually output, instead of being called before ever calling the log.debug method.

I'll leave it as an exercise for the reader to go through all the other System.out.println statements and change them to Logger method calls. However, there is one I'd like to point out explicitly:


log.warn( "Tried to invoke unknown rule ID {}", ruleId );


This is a warning (and should perhaps even be an error), so we want to make sure that it's output at a higher severity, so that even if we turn off debug output, we still see this error message.

Speaking of turning off debugging, how do we actually configure this system? The answer is that by default, Log4J looks for a file called log4j.properties on the classpath somewhere at runtime. The log4j.properties file that I'm using looks like this:


# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=DEBUG, A1

# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender

# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n


Line 2 says that by default, our logging will be at the "DEBUG" level (i.e. we'll see all levels from debug and up, but not trace messages), and when a log statement is output, it'll go to appender A1. (An appender is just a class that takes a log statement and dumps it to some sort of output.) It's possible to output log statements to multiple appenders, but we won't bother for this example. Line 5 defines A1 as a ConsoleAppender, which as you might guess outputs its statements to the console. Lines 8 and 9 configure the ConsoleAppender by telling it what pattern to use to output log statements. This format will output the severity (5 characters), then the time, then the file and line number, then the message, and finally a newline, thus:


DEBUG [main] (StylesheetCreator.java:35) - Rule table values: age 2 18


For more information on how to configure Log4J, please see the documentation.

String Escaping



What will happen right now if we input a parameter to the greaterThanRule with the following value?


-a)'/><xsl:value-of select="java:deleteAllFiles();"/>
<xsl:variable value="dummy" select="string('


Well, our XML output will contain something like the following:


<xsl:variable name=\"var3\" select=\"category\"/>
<xsl:variable name=\"result3\" select=\" java:greaterThan($var3,'-a)'/>
<xsl:value-of select="java:deleteAllFiles();"/>
<xsl:variable value="dummy" select="string('')\"/>


If users are able to specify their own parameter values (say, via the web interface), then they can force our program to call the deleteAllFiles() function, which presumably does something bad. This is called injection, and great care needs to be taken in order to prevent this sort of thing from happening.

If we were to use an XML library like JDOM, strings like this would be escaped without us having to think about it. However, using JDOM at this stage would be difficult; so I'm going to opt for a rather low-tech solution: string escaping. We could write our own function, but I'm not going to reinvent the wheel. Instead, we'll use Commons Lang, which contains a handy StringEscapeUtils.escapeXml function that does exactly what we need.

Let's insert string escaping into the addProcessedChildNode method. Previously it looked like this:


private void addProcessedChildNode( String nodeName, int resultVarIndex )
{
childNodes += "<" + nodeName + "><xsl:value-of select=\"$result"
+ resultVarIndex + "\"/>" + "</" + nodeName + ">";
}


With the addition of StringEscapeUtils.escapeXml, it looks like this:


private void addProcessedChildNode( String nodeName, int resultVarIndex )
{
String escapedNodeName = StringEscapeUtils.escapeXml( nodeName );
childNodes += "<" + escapedNodeName + "><xsl:value-of select=\"$result"
+ resultVarIndex + "\"/>" + "</" + escapedNodeName + ">";
}


The following methods will need to be modified in the same way:


  • getStylesheetHeader

  • getStylesheetFooter

  • addRejectedNodeContents

  • addRejectedNode

  • addChildNodeIfNotPresent

  • addProcessedChildNode

  • stringProcessingRule

  • validationRule



We have to take care that we do not escape the same string twice.

Now, the test will not pass, because StringEscapeUtils also escapes single quote marks as &apos;. This is a harmless change: the XML parser will consider the single quotes and &apos; entities to be identical, although it isn't as pretty. So we can alter the test case accordingly.

There is still a way our users can inject malicious code into our files: the string parameters can still contain single quotes. These will now be escaped so that they do not break the XML, but they are not escaped to prevent XPath from processing them as the end of a string. I'm not going to fix the program to handle this right now, since it is surprisingly difficult, but it should be done in an actual production system.

And that's it! We saw how to reduce the duplication in StylesheetHelper, and introduced two third-party APIs: SLF4J (wrapping Log4J) and Commons Lang. Next time we'll start on the really fun stuff: improving the code's design! Stay tuned....

Sunday, June 27, 2010

Turning functional code into great code: Part 2: Language usage

Welcome! This post is part of a series; if you missed Part 1, go ahead and read it.

Today, we're going to focus on how the StylesheetCreator class can make better use of the Java language and its standard API. We'll leave third-party API usage and design improvements for another day, although there is always a bit of overlap among these three areas.

The first improvement is at the very end of the createStylesheet method when writing out the file.


FileOutputStream fo = new FileOutputStream( stylesheetDirectory
+ outputFilename );
PrintStream ps = new PrintStream( fo );
ps.println( xsltText );


There are two issues here:

  • The stream is never closed. It will be closed when the garbage collector cleans it up and runs the stream's finalize method, but we don't know when that is. So we could run out of open file descriptors if the method is called many times repeatedly.
  • We are using an OutputStream to write characters to a file. InputStream and OutputStream are for reading and writing byte streams; Reader and Writer are for reading and writing character streams. They also give us control over the character encoding.


So our new code looks like this:


FileWriter fw = new FileWriter(
new File( stylesheetDirectory, outputFilename ) );
try
{
fw.write( xsltText );
fw.write( "\n" );
}
finally
{
fw.close();
}


If we run the test again, it still works, so we know that our change hasn't broken anything.

Now let's go back and look at this code:


String ruleContents = "";
...
for ( int z = 0; z < ruleList.size(); z++ )
{
...
switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents += stylesheetHelper.concatRule( z + 1,
columnName, params );
break;
}
...
}
}


Do you notice what is happening? We are building the ruleContents string in a loop via the += operator. Since strings in Java are immutable, this will construct a brand-new string each time through the loop, by copying the contents of the old string into the new one, then appending the new rule text. Basically, in Java, the code a += b is equivalent to:


a = new StringBuffer( a ).append( b ).toString();


This is fine if we're only doing it a few times, but it can get pretty inefficient to do it over and over in a loop.

So how do we fix it? By using either StringBuffer, or (in Java 5 and up) StringBuilder instead. The difference is that StringBuffer is synchronized, meaning that you can safely call it from multiple threads at once; and StringBuilder is not. We're not creating multiple threads within the code that is building up our strings, so we'll just use StringBuilder as follows.


StringBuilder ruleContents = new StringBuilder();
...
for ( int z = 0; z < ruleList.size(); z++ )
{
...
switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents.append( stylesheetHelper.concatRule( z + 1,
columnName, params ) );
break;
}
...
}
}


We can modify the other cases similarly. Once again, we run our test, and it still passes. Excellent! (By the way, I'll just assume that we run the test after each subsequent modification mentioned in this post to ensure that we haven't broken anything.)

Now let's look at a somewhat odd bit of code: the params variable. Here's how it's constructed and two samples of how it's used.


String params = "";
if ( ruleId == 1 || ruleId == 2 || ruleId == 3 || ruleId == 5 )
{
params = param1;
}
else
{
params = param1 + "," + param2;
}
...
switch ( ruleId )
{
...
case 2: // greater than
{
System.out.println( "Case 2" );
if ( params.indexOf( "-" ) < 0 )
{
int paramInt = Integer.parseInt( params );
ruleContents.append( stylesheetHelper.greaterThanRule(
z + 1, columnName, paramInt ) );
}
else
{
ruleContents.append( stylesheetHelper.greaterThanRule(
z + 1, columnName, params ) );
}
break;
}
...
case 4: // replace
{
System.out.println( "Case 4" );
String paramArr[] = params.split( "," );
ruleContents.append( stylesheetHelper.replaceRule( z + 1,
paramArr[0], columnName, paramArr[1] ) );
break;
}
...
}


This looks rather suspect to me. param1 and param2 (only where applicable) are concatenated together into params separated by a comma, and then they are split apart again in some cases. This seems unnecessary and even fragile, especially if the strings themselves contain commas. Fortunately, we can entirely avoid the use of the params variable as follows.


...
switch ( ruleId )
{
...
case 2: // greater than
{
System.out.println( "Case 2" );
if ( param1.indexOf( "-" ) < 0 )
{
int paramInt = Integer.parseInt( param1 );
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, paramInt ) );
}
else
{
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, param1 ) );
}
break;
}
...
case 4: // replace
{
System.out.println( "Case 4" );
ruleContents.append( stylesheetHelper.replaceRule( z + 1,
param1, columnName, param2 ) );
break;
}
...
}


We can make similar adjustments for all the other cases.

Now let's look at the signature of the createStylesheet method:


public void createStylesheet( String parentNodeName, String childNodeNames,
List ruleList, String outputFilename ) throws Exception


When I created the test case in part 1 of this series, the hardest part was trying to determine what I needed to pass in for childNodeNames and ruleList. As it turns out, childNodeNames is a comma-separated list of XML node names, which are strings. So, if it is a list of Strings, why not just use a List<String>? Indeed we should do that, but we'll postpone that modification until part 3, when we take a look at StylesheetHelper.

So what about ruleList? The trouble is that I don't know exactly what needs to go in the list -- and in fact, Java will give you a warning when you try to compile the file, complaining that we're using a raw type (one without type parameters) instead of a parameterized type. Happily, we can figure out what type of object goes in the list by looking at the code that uses it.


for ( int z = 0; z < ruleList.size(); z++ )
{
ArrayList rule = (ArrayList) ruleList.get( z );

String columnName = (String) rule.get( 1 );
int ruleId = ( Integer.parseInt( rule.get( 2 ) + "" ) );
String param1 = (String) ( rule.get( 3 ) );
String param2 = (String) ( rule.get( 4 ) );
...
}


So, ruleList is a list of ArrayLists, each of which contains five elements. The first element (index 0) is never used; the second, fourth and fifth are strings, and the third is unknown. Since I don't have all the original code, I'm going to go out on a limb here and assume that all the elements of each inner list are strings. I don't know for sure, but it makes this example easier. So we have a list of ArrayLists of strings:


public void createStylesheet( String parentNodeName, String childNodeNames,
List<ArrayList<String>> ruleList, String outputFilename ) throws Exception


But we can make this a little better. Why should we require the inner lists to be ArrayLists? It is true that we get elements out via random access (i.e. the get method), and LinkedLists require traversing all the way from the beginning of the list to the required element. However, these inner lists only contain five elements, so the performance penalty will be minimal. So let's just use List instead of ArrayList and permit the caller to pass in any type of list. Our method signature now looks like this:


public void createStylesheet( String parentNodeName, String childNodeNames,
List<List<String>> ruleList, String outputFilename ) throws Exception


Note that this is still not an optimal solution, but since we're just showing language usage for now and not delving into design, we'll leave it like this.

So now that we have a properly parameterized list, we can make good use of it and remove all the casts from the loop:


for ( int z = 0; z < ruleList.size(); z++ )
{
List<String> rule = ruleList.get( z );

String columnName = rule.get( 1 );
int ruleId = ( Integer.parseInt( rule.get( 2 ) ) );
String param1 = rule.get( 3 );
String param2 = rule.get( 4 );
...
}


Also, for the aforementioned performance reasons, we should avoid using random access on long lists if we can -- and the outermost list of rules could theoretically be long. Happily, there's no reason in this case why we need to use random access (List.get) to get the rules out of the ruleList. In fact, Java 5 makes this super-easy for us.


for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
...
switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents.append( stylesheetHelper.concatRule( z + 1,
columnName, params );
break;
}
...
}
}


Unfortunately, the above code won't compile. We no longer have the loop index z that we were using to represent the rule number, which is passed into all the StylesheetHelper methods. As a result, we need to simulate the old z index:


int z = 0;
for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
...
switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents.append( stylesheetHelper.concatRule( z + 1,
columnName, params );
break;
}
...
}

z++;
}


If you take a look at the way that z is used throughout the code, it's always z + 1, a rule number. So let's give it a meaningful name, and initialize it to 1 so that we don't constantly have to add 1 everywhere.


int ruleNum = 1;
for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
int ruleId = Integer.parseInt( rule.get( 2 ) );
String param1 = rule.get( 3 );
String param2 = rule.get( 4 );
System.out.println( "Rule table values: " + columnName + " " +
ruleId + " " + param1 + " " + param2 );

switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents.append( stylesheetHelper.concatRule( ruleNum,
columnName, params );
break;
}
...
}

ruleNum++;
}


While we're looking at the switch statement, there are a couple of improvements that can be made. Firstly, what happens if we don't match any case? Right now, nothing. The code will just bypass the errant rule ID and keep going with the next one. However, if I were debugging this code in production, I'd want to know that someone passed an unexpected rule ID to this method! So let's add a default case. In fact, in general, one should always have a default case in a switch statement.


int ruleNum = 1;
for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
int ruleId = Integer.parseInt( rule.get( 2 ) );
String param1 = rule.get( 3 );
String param2 = rule.get( 4 );
System.out.println( "Rule table values: " + columnName + " " +
ruleId + " " + param1 + " " + param2 );

switch ( ruleId )
{
case 1: // concat
{
...
}
case 2: // greater than
{
...
}
...
default:
{
System.out.println( "WARNING: Tried to invoke unknown rule ID " + ruleId );
}
}

ruleNum++;
}


Now let's get rid of the "magic numbers" above (case 1, 2, 3, 4, 5, 16). We could use public static final int constants to do that, but we can do better.

Java 5 has a new feature for representing a selection of known values: enums. Unlike in C, where enums are basically just glorified integers, Java represents enums as classes, with each enum value being an instance of that class. Since each enum has an "ordinal" (integer) value, enums can be used in switch statements too.


package net.galluzzo.example.stylesheetbuilder.refactored.part2;

public enum RuleType
{
CONCAT,
GREATER_THAN,
LESS_THAN,
REPLACE,
LENGTH,
BETWEEN
}


This is a good start; but in our case, we have an existing rule ID that we want to map to a RuleType -- we don't want to use Java's default numbering scheme that starts at 0 and goes up sequentially. So instead of doing external mapping, we'll let the enumeration itself do the mapping.


package net.galluzzo.example.stylesheetbuilder.refactored.part2;

import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;

public enum RuleType
{
CONCAT( 1 ),
GREATER_THAN( 2 ),
LESS_THAN( 3 ),
REPLACE( 4 ),
LENGTH( 5 ),
BETWEEN( 16 );

private static final Map<Integer, RuleType> ruleIdToRuleTypeMap =
new HashMap<Integer, RuleType>();

private int ruleId;


static
{
for ( RuleType ruleType : EnumSet.allOf( RuleType.class ) )
{
ruleIdToRuleTypeMap.put( ruleType.getRuleId(), ruleType );
}
}

public static RuleType valueFromId( int ruleId )
{
return ruleIdToRuleTypeMap.get( ruleId );
}


private RuleType( int ruleId )
{
this.ruleId = ruleId;
}

public int getRuleId()
{
return ruleId;
}
}


The methods are all pretty self-explanatory except for the static initializer, which uses the built-in EnumSet class to loop over all the enum values in order to build a map from rule IDs to their corresponding RuleTypes. We use this map in the valueFromId method, which retrieves the RuleType instance for a particular rule ID.

So the loop in StylesheetCreator.createStylesheet now looks like this:


int ruleNum = 1;
for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
int ruleId = Integer.parseInt( rule.get( 2 ) );
RuleType ruleType = RuleType.valueFromId( ruleId );
String param1 = rule.get( 3 );
String param2 = rule.get( 4 );
System.out.println( "Rule table values: " + columnName + " " +
ruleId + " " + param1 + " " + param2 );

switch ( ruleType )
{
case CONCAT:
{
System.out.println( "Case 1" );
ruleContents.append( stylesheetHelper.concatRule( ruleNum,
columnName, param1 ) );
break;
}
case GREATER_THAN:
{
System.out.println( "Case 2" );
if ( param1.indexOf( "-" ) < 0 )
{
int paramInt = Integer.parseInt( param1 );
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, paramInt ) );
}
else
{
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, param1 ) );
}
break;
}
...
}

ruleNum++;
}


We replaced the switch statement with a switch over the RuleType enum values, which is more readable. In fact, we were able to ditch the comments after each case describing what it was supposed to do; they're redundant, because the enum name gives us the same information that the comment used to.

Let's go one step further by consolidating the System.out.printlns at the beginning of each case into one place, and let's make the code print out the rule type enumeration instead of the rule ID:


int ruleNum = 1;
for ( List<String> rule : ruleList )
{
String columnName = rule.get( 1 );
int ruleId = Integer.parseInt( rule.get( 2 ) );
RuleType ruleType = RuleType.valueFromId( ruleId );
String param1 = rule.get( 3 );
String param2 = rule.get( 4 );
System.out.println( "Rule table values: " + columnName + " " +
ruleId + " " + param1 + " " + param2 );

if ( ruleType != null )
{
System.out.println( "Case " + ruleType );
}

switch ( ruleType )
{
case CONCAT:
{
ruleContents.append( stylesheetHelper.concatRule( ruleNum,
columnName, param1 ) );
break;
}
case GREATER_THAN:
{
if ( param1.indexOf( "-" ) < 0 )
{
int paramInt = Integer.parseInt( param1 );
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, paramInt ) );
}
else
{
ruleContents.append( stylesheetHelper.greaterThanRule(
ruleNum, columnName, param1 ) );
}
break;
}
...
}

ruleNum++;
}


This enum type is pretty nifty, but we'll discuss an even better solution in another couple of posts.

That's about it for now. We've covered Writer vs. OutputStream, StringBuffer vs. StringBuilder vs. concatenation, type parameters, the enhanced for loop, and enums. Next time, we'll look beyond the standard Java runtime to third-party APIs, and take our first look at the mysterious StylesheetHelper class. Stay tuned!

Wednesday, June 23, 2010

Turning functional code into great code: Part 1: Testing

I recently came into contact with an eager young developer with a year or two of work experience, who showed me his code and asked me how to improve it. Since I've been asked this by many people over the course of many years, I thought I might be useful for other young (or not-so-young!) developers who wish to improve their code also. I plan to break this into multiple parts, providing explanations and brief justifications for those who may be new to various best practices, even basic ones. Hope you stay tuned!

Caveat: I am not the world's foremost expert in, well, much of anything, and I have formed various opinions and biases over the years. So if you're a senior developer or architect, I'm sure you'll see something here that either I missed or you disagree with. Please feel free to let me know in the comments and I'll be happy to discuss it further. Also please note that I'll only be covering testability in this part, and will gradually introduce more sophisticated improvements in future installments.

Okay, without further ado, let's get started! The code in question builds an XSLT stylesheet for validation and processing of data contained in an XML file.

StylesheetCreationAction.java

package net.galluzzo.example.stylesheetbuilder.old;

import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.Action;
import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;

public class StylesheetCreationAction extends Action
{
String stylesheetDirectory = "C:\\apache-tomcat\\webapps\\stylesheet-builder\\xslt";

public ActionForward execute( ActionMapping mapping, ActionForm form,
HttpServletRequest request, HttpServletResponse response )
{
//...

createStylesheet( parentNodeName, childNodeNames, ruleList,
outputFilename );

return new ActionForward( "success" );
}

public void createStylesheet( String parentNodeName, String childNodeNames,
List ruleList, String outputFilename ) throws Exception
{
System.out.println( "Inside createStylesheet" );

StylesheetHelper stylesheetHelper = new StylesheetHelper();

String ruleContents = "";
System.out.println( "Number of rules: " + ruleList.size() );

for ( int z = 0; z < ruleList.size(); z++ )
{
ArrayList rule = (ArrayList) ruleList.get( z );

String columnName = (String) rule.get( 1 );
int ruleId = ( Integer.parseInt( rule.get( 2 ) + "" ) );
String param1 = (String) ( rule.get( 3 ) );
String param2 = (String) ( rule.get( 4 ) );
String params = "";
if ( ruleId == 1 || ruleId == 2 || ruleId == 3 || ruleId == 5 )
{
params = param1;
}
else
{
params = param1 + "," + param2;
}
System.out.println( "Rule table values: " + columnName + " "
+ ruleId + " " + params );

switch ( ruleId )
{
case 1: // concat
{
System.out.println( "Case 1" );
ruleContents += stylesheetHelper.concatRule( z + 1,
columnName, params );
break;
}
case 2: // greater than
{
System.out.println( "Case 2" );
if ( params.indexOf( "-" ) < 0 )
{
int paramInt = Integer.parseInt( params );
ruleContents += stylesheetHelper.greaterThanRule(
z + 1, columnName, paramInt );
}
else
{
ruleContents += stylesheetHelper.greaterThanRule(
z + 1, columnName, params );
}
break;
}
// Several other cases here which we'll get to later
}
}

// Get header and footer stylesheet content.
String stylesheetHeader = stylesheetHelper.getStylesheetHeader(
parentNodeName, childNodeNames );
String stylesheetFooter = stylesheetHelper.getStylesheetFooter( parentNodeName );

// Get the beginning and ending text for the <xslt:choose> node.
String xsltChooseStart = stylesheetHelper.getXsltChooseStart();
String xsltWhenEnd = stylesheetHelper.getXsltWhenEnd();
String xsltOtherwiseStart = stylesheetHelper.getXsltOtherwiseStart();
String xsltChooseEnd = stylesheetHelper.getXsltChooseEnd();

// Get XML nodes.
String parentNode = stylesheetHelper.getParentNode();
String rejectedParentNode = stylesheetHelper.getRejectedParentNode();

// Construct the entire stylesheet.
String xsltText = stylesheetHeader + ruleContents + xsltChooseStart
+ parentNode + xsltWhenEnd + xsltOtherwiseStart
+ rejectedParentNode + xsltChooseEnd + stylesheetFooter;

// Create the output file.
FileOutputStream fo = new FileOutputStream( stylesheetDirectory
+ outputFilename );
PrintStream ps = new PrintStream( fo );
ps.println( xsltText );
}
}

We will cover the StylesheetHelper class in a future installment.

Testability


The goal of this series of blog posts is to improve this code. But how do I do that and still ensure that the refactored code still does what the old code did? By writing a unit test, then making changes, and after each change, ensuring that the test still passes.

Unit tests are programs that test a "unit" of code, usually a class, by executing various functionality (methods) on it, and programmatically ensuring that the results are correct. I will use the JUnit 4 framework in this example, for the simple reasons that I know it fairly well, and it is widely used.

The problem is that I cannot actually run the above class on my machine. You see, I am writing this blog post on Linux, and the final code inside createStylesheet writes out a file into a specific Windows directory that is hard-coded into the class. So, for the very smallest change possible, for now, I will change this directory to one that works on my machine. (This is not a good solution, but we'll get to that later.)

Furthermore, this class is a Struts action, which means that it must extend from Struts' Action base class in order to do any work. I don't have a copy of Struts 1.x handy, and frankly I should not need it in order to build an XSLT stylesheet file from a list of passed-in rules. Happily, the createStylesheet method is the one I really care about, and it only uses Java APIs, not Struts or J2EE. The creation of a stylesheet is functionality that can be reused outside of a single web action, so it should go in a non-J2EE-related class. We'll call this class StylesheetCreator, thus:


public class StylesheetCreator
{
String stylesheetDirectory = "/home/eric/workspace/stylesheet-builder/resources/output/";

public void createStylesheet( String parentNodeName, String childNodeNames,
List ruleList, String outputFilename ) throws Exception
{
System.out.println( "Inside createStylesheet" );
// The rest of the original method goes here.
}
}

With that simple change, we can now write a test case for this functionality, which looks like this:


package net.galluzzo.example.stylesheetbuilder.refactored.part1;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.junit.Test;

/**
* Test case for {@link StylesheetCreator}.
*
* @author Eric Galluzzo
*/
public class StylesheetCreatorTest
{
private static final String STYLESHEET = // ...long XML string here

@Test
public void testAllRules() throws Exception
{
StylesheetCreator stylesheetCreator = new StylesheetCreator();

List<List<String>> rules = new ArrayList<List<String>>();
rules.add( createRule( "name", 1, ", Esq.", "" ) );
rules.add( createRule( "age", 2, "18", "" ) );
rules.add( createRule( "category", 2, "-aaa", "" ) );
rules.add( createRule( "age", 3, "100", "" ) );
rules.add( createRule( "category", 3, "-zzz", "" ) );
rules.add( createRule( "department", 4, "old", "new" ) );
rules.add( createRule( "category", 5, "4", "" ) );
rules.add( createRule( "bonus", 16, "100", "5000" ) );
rules.add( createRule( "category", 16, "-aaa", "-zzz" ) );

stylesheetCreator.createStylesheet( "employee",
"name,age,salary,category,department,bonus", rules, "test.xsl" );

String stylesheetText = readFileContents(
new File( stylesheetCreator.stylesheetDirectory, "test.xsl" ) );
assertThat( stylesheetText, is( STYLESHEET ) );
}

private List<String> createRule( String columnName, int ruleId,
String param1, String param2 )
{
List<String> rule = new ArrayList<String>();
rule.add( "employee" );
rule.add( columnName );
rule.add( String.valueOf( ruleId ) );
rule.add( param1 );
rule.add( param2 );
return rule;
}

private String readFileContents( File file ) throws IOException
{
StringBuilder sb = new StringBuilder();
BufferedReader reader = new BufferedReader( new FileReader( file ) );
try
{
String line;
while ( ( line = reader.readLine() ) != null )
{
sb.append( line ).append( "\n" );
}
}
finally
{
reader.close();
}
return sb.toString();
}
}

Note that I've taken some shortcuts here: I've defined only one test, which is a positive test, covering all the different stylesheet rules. Ordinarily one would write both positive and negative tests for each individual rule, so that when a rule broke, one would immediately know exactly which rule it was. However, the all-in-one test is a little shorter, and thus easier to understand on a blog.

It is now possible to run this in one's IDE of choice and get that wonderful green bar, indicating the test has passed! So we can now begin to improve this code, which we will do in Part 2. Stay tuned!

Wednesday, June 09, 2010

Using inheritance with fluent interfaces: get this

Recently I had a situation where I needed to implement Joshua Bloch's Builder pattern (see Effective Java, item 2) over a hierarchy of Java domain objects. Similar problems would arise when building other types of fluent interface, which commonly "return this" from each method in order to support method chaining. Creating a working solution presented some interesting challenges!

Without going into too many details, the fluent Builder pattern is needed in languages without named arguments and default arguments, like Java, in order to avoid long lists of constructor parameters, or a bunch of setters on the object (which may be immutable). For example:

public class X {
protected int foo;
protected int bar;

public static class Builder {
private X x = new X();

public Builder withFoo( int foo ) {
x.foo = foo; return this;
}
public Builder withBar( int bar ) {
x.bar = bar; return this;
}
public X build() { return x; }
}

protected X() {}

public int getFoo() { return foo; }
public int getBar() { return bar; }
}

You would then use this code as follows:

X x = new X.Builder().withFoo( 1 ).withBar( 2 ).build();

Obviously, for a class with only two instance variables, this pattern doesn't make much sense -- you'd just use a constructor. But for domain objects with lots more than two instance variables, or with subclasses that may add more than two, it makes plenty of sense.

However, this becomes more challenging when dealing with objects that use inheritance. In the project I'm working on, we had a number of domain objects that were (legitimately) using inheritance. The trouble is that the builder pattern doesn't particularly handle this well as it stands.

The simplest way of creating builders for inherited classes is simply to duplicate the builder methods:

public class Y extends X {
private int baz;

public static class Builder {
private Y y = new Y();

public Builder withFoo( int foo ) {
y.foo = foo; return this;
}
public Builder withBar( int bar ) {
y.bar = bar; return this;
}
public Builder withBaz( int baz ) {
y.baz = baz; return this;
}
public Y build() { return y; }
}

protected Y() {}

public int getBaz() { return baz; }
}

But duplication is EvilTM, especially if you have lots of builder methods, or lots of subclasses. We really don't want both Builder classes to have the withFoo and withBar methods. How do we get around this?

My first thought was something along the following lines:

public abstract class X {
protected int foo;
protected int bar;

protected static class Builder<T extends X> {
private T x;

public Builder() { x = createX(); }
public Builder<T> withFoo( int foo ) {
x.foo = foo; return this;
}
public Builder<T> withBar( int bar ) {
x.bar = bar; return this;
}
public T build() { return x; }
protected abstract T createX();
}

protected X() {}

public int getFoo() { return foo; }
public int getBar() { return bar; }
}

public class Y extends X {
private int baz;

public static class Builder extends X.Builder<Y> {
public Builder withBaz( int baz ) {
y.baz = baz; return this;
}
protected Y createX() { return new Y(); }
}

protected Y() {}

public int getBaz() { return baz; }
}

The only trouble with that is that the following fails to compile:

Y y = new Y.Builder().withFoo( 1 ).withBaz( 3 ).build();

Why? Because withFoo returns a Builder<Y>, not a Y.Builder; and the withBaz method is on the latter, not the former.

So...the code I actually wrote looked like this:

public abstract class X {
protected int foo;
protected int bar;

protected static class Builder<T extends X,
B extends Builder<T, B>> {

private T obj;

public Builder() { obj = createObj(); }
public B withFoo( int foo ) {
obj.foo = foo; return this;
}
public B withBar( int bar ) {
obj.bar = bar; return this;
}
public T build() { return built; }
protected abstract T createObj();
}

protected X() {}

public int getFoo() { return foo; }
public int getBar() { return bar; }
}

public class Y extends X {
private int baz;

public static class Builder
extends X.Builder<Y, Y.Builder> {

public Builder withBaz( int baz ) {
obj.baz = baz; return this;
}
protected Y createObj() { return new Y(); }
}

protected Y() {}

public int getBaz() { return baz; }
}

Now the return types are correct...but the withFoo and withBar methods won't compile. The trouble is that this inside X.Builder<T, B> is of type X.Builder<T, B>, not of type B. Actually, at runtime, the builder class should indeed be of type B, but it would be inelegant and type-unsafe to cast this to B everywhere.

Happily, there is a solution that doesn't involve casting. This is the final version of the code:

public abstract class X {
protected int foo;
protected int bar;

protected static class Builder<T extends X,
B extends Builder<T, B>> {

private T obj;
private B thisObj;

public Builder() {
obj = createObj(); thisObj = getThis();
}
public B withFoo( int foo ) {
obj.foo = foo; return thisObj;
}
public B withBar( int bar ) {
obj.bar = bar; return thisObj;
}
public T build() { return built; }
protected abstract T createObj();
protected abstract B getThis();
}

protected X() {}

public int getFoo() { return foo; }
public int getBar() { return bar; }
}

public class Y extends X {
private int baz;

public static class Builder
extends X.Builder<Y, Y.Builder> {

public Builder withBaz( int baz ) {
obj.baz = baz; return thisObj;
}
protected Y createObj() { return new Y(); }
protected Builder getThis() { return this; }
}

protected Y() {}

public int getBaz() { return baz; }
}

We added the getThis() method, which is always implemented as return this; in each subclass. This ensures that the Builder subclass is in fact the B type parameter to X.Builder<T, B>.

So, at the price of a small wart (the getThis() method plus associated instance variable), we've got a solution that maintains type safety, removes duplication, and allows all the code completion goodness that your IDE of choice may offer. Win!

Hello!

I found this old blog in the corner collecting dust, and thought I'd take it out and give it a shine.

I'm Eric Galluzzo, a software developer/designer/architect, primarily in Java and Java-related languages. On this blog I hope to share a few tidbits that may be useful to others. Please feel free to drop by and leave a note!