/* * Copyright 2009-2017 java-diff-utils. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.difflib.text; import com.github.difflib.DiffUtils; import com.github.difflib.patch.AbstractDelta; import com.github.difflib.patch.ChangeDelta; import com.github.difflib.patch.Chunk; import com.github.difflib.patch.DeleteDelta; import com.github.difflib.patch.DeltaType; import com.github.difflib.patch.InsertDelta; import com.github.difflib.patch.Patch; import com.github.difflib.text.DiffRow.Tag; import java.util.*; import java.util.function.BiFunction; import java.util.function.BiPredicate; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.util.stream.Collectors.toList; /** * This class for generating DiffRows for side-by-sidy view. You can customize * the way of generating. For example, show inline diffs on not, ignoring white * spaces or/and blank lines and so on. All parameters for generating are * optional. If you do not specify them, the class will use the default values. * * These values are: showInlineDiffs = false; ignoreWhiteSpaces = true; * ignoreBlankLines = true; ... * * For instantiating the DiffRowGenerator you should use the its builder. Like * in example * DiffRowGenerator generator = new DiffRowGenerator.Builder().showInlineDiffs(true). * ignoreWhiteSpaces(true).columnWidth(100).build(); * */ public final class DiffRowGenerator { public static final BiPredicate DEFAULT_EQUALIZER = Object::equals; public static final BiPredicate IGNORE_WHITESPACE_EQUALIZER = (original, revised) -> adjustWhitespace(original).equals(adjustWhitespace(revised)); public static final Function LINE_NORMALIZER_FOR_HTML = StringUtils::normalize; /** * Splitting lines by character to achieve char by char diff checking. */ public static final Function> SPLITTER_BY_CHARACTER = line -> { List list = new ArrayList<>(line.length()); for (Character character : line.toCharArray()) { list.add(character.toString()); } return list; }; public static final Pattern SPLIT_BY_WORD_PATTERN = Pattern.compile("\\s+|[,.\\[\\](){}/\\\\*+\\-#]"); /** * Splitting lines by word to achieve word by word diff checking. */ public static final Function> SPLITTER_BY_WORD = line -> splitStringPreserveDelimiter(line, SPLIT_BY_WORD_PATTERN); public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); public static Builder create() { return new Builder(); } private static String adjustWhitespace(String raw) { return WHITESPACE_PATTERN.matcher(raw.trim()).replaceAll(" "); } protected final static List splitStringPreserveDelimiter(String str, Pattern SPLIT_PATTERN) { List list = new ArrayList<>(); if (str != null) { Matcher matcher = SPLIT_PATTERN.matcher(str); int pos = 0; while (matcher.find()) { if (pos < matcher.start()) { list.add(str.substring(pos, matcher.start())); } list.add(matcher.group()); pos = matcher.end(); } if (pos < str.length()) { list.add(str.substring(pos)); } } return list; } /** * Wrap the elements in the sequence with the given tag * * @param startPosition the position from which tag should start. The * counting start from a zero. * @param endPosition the position before which tag should should be closed. * @param tagGenerator the tag generator */ static void wrapInTag(List sequence, int startPosition, int endPosition, Tag tag, BiFunction tagGenerator, Function processDiffs, boolean replaceLinefeedWithSpace) { int endPos = endPosition; while (endPos >= startPosition) { //search position for end tag while (endPos > startPosition) { if (!"\n".equals(sequence.get(endPos - 1))) { break; } else if (replaceLinefeedWithSpace) { sequence.set(endPos - 1, " "); break; } endPos--; } if (endPos == startPosition) { break; } sequence.add(endPos, tagGenerator.apply(tag, false)); if (processDiffs != null) { sequence.set(endPos - 1, processDiffs.apply(sequence.get(endPos - 1))); } endPos--; //search position for end tag while (endPos > startPosition) { if ("\n".equals(sequence.get(endPos - 1))) { if (replaceLinefeedWithSpace) { sequence.set(endPos - 1, " "); } else { break; } } if (processDiffs != null) { sequence.set(endPos - 1, processDiffs.apply(sequence.get(endPos - 1))); } endPos--; } sequence.add(endPos, tagGenerator.apply(tag, true)); endPos--; } } private final int columnWidth; private final BiPredicate equalizer; private final boolean ignoreWhiteSpaces; private final Function> inlineDiffSplitter; private final boolean mergeOriginalRevised; private final BiFunction newTag; private final BiFunction oldTag; private final boolean reportLinesUnchanged; private final Function lineNormalizer; private final Function processDiffs; private final boolean showInlineDiffs; private final boolean replaceOriginalLinefeedInChangesWithSpaces; private DiffRowGenerator(Builder builder) { showInlineDiffs = builder.showInlineDiffs; ignoreWhiteSpaces = builder.ignoreWhiteSpaces; oldTag = builder.oldTag; newTag = builder.newTag; columnWidth = builder.columnWidth; mergeOriginalRevised = builder.mergeOriginalRevised; inlineDiffSplitter = builder.inlineDiffSplitter; if (builder.equalizer != null) { equalizer = builder.equalizer; } else { equalizer = ignoreWhiteSpaces ? IGNORE_WHITESPACE_EQUALIZER : DEFAULT_EQUALIZER; } reportLinesUnchanged = builder.reportLinesUnchanged; lineNormalizer = builder.lineNormalizer; processDiffs = builder.processDiffs; replaceOriginalLinefeedInChangesWithSpaces = builder.replaceOriginalLinefeedInChangesWithSpaces; Objects.requireNonNull(inlineDiffSplitter); Objects.requireNonNull(lineNormalizer); } /** * Get the DiffRows describing the difference between original and revised * texts using the given patch. Useful for displaying side-by-side diff. * * @param original the original text * @param revised the revised text * @return the DiffRows between original and revised texts */ public List generateDiffRows(List original, List revised) { return generateDiffRows(original, DiffUtils.diff(original, revised, equalizer)); } /** * Generates the DiffRows describing the difference between original and * revised texts using the given patch. Useful for displaying side-by-side * diff. * * @param original the original text * @param patch the given patch * @return the DiffRows between original and revised texts */ public List generateDiffRows(final List original, Patch patch) { List diffRows = new ArrayList<>(); int endPos = 0; final List> deltaList = patch.getDeltas(); for (AbstractDelta originalDelta : deltaList) { for (AbstractDelta delta : decompressDeltas(originalDelta)) { endPos = transformDeltaIntoDiffRow(original, endPos, diffRows, delta); } } // Copy the final matching chunk if any. for (String line : original.subList(endPos, original.size())) { diffRows.add(buildDiffRow(Tag.EQUAL, line, line)); } return diffRows; } /** * Transforms one patch delta into a DiffRow object. */ private int transformDeltaIntoDiffRow(final List original, int endPos, List diffRows, AbstractDelta delta) { Chunk orig = delta.getSource(); Chunk rev = delta.getTarget(); for (String line : original.subList(endPos, orig.getPosition())) { diffRows.add(buildDiffRow(Tag.EQUAL, line, line)); } switch (delta.getType()) { case INSERT: for (String line : rev.getLines()) { diffRows.add(buildDiffRow(Tag.INSERT, "", line)); } break; case DELETE: for (String line : orig.getLines()) { diffRows.add(buildDiffRow(Tag.DELETE, line, "")); } break; default: if (showInlineDiffs) { diffRows.addAll(generateInlineDiffs(delta)); } else { for (int j = 0; j < Math.max(orig.size(), rev.size()); j++) { diffRows.add(buildDiffRow(Tag.CHANGE, orig.getLines().size() > j ? orig.getLines().get(j) : "", rev.getLines().size() > j ? rev.getLines().get(j) : "")); } } } return orig.last() + 1; } /** * Decompresses ChangeDeltas with different source and target size to a * ChangeDelta with same size and a following InsertDelta or DeleteDelta. * With this problems of building DiffRows getting smaller. * * @param deltaList */ private List> decompressDeltas(AbstractDelta delta) { if (delta.getType() == DeltaType.CHANGE && delta.getSource().size() != delta.getTarget().size()) { List> deltas = new ArrayList<>(); //System.out.println("decompress this " + delta); int minSize = Math.min(delta.getSource().size(), delta.getTarget().size()); Chunk orig = delta.getSource(); Chunk rev = delta.getTarget(); deltas.add(new ChangeDelta( new Chunk<>(orig.getPosition(), orig.getLines().subList(0, minSize)), new Chunk<>(rev.getPosition(), rev.getLines().subList(0, minSize)))); if (orig.getLines().size() < rev.getLines().size()) { deltas.add(new InsertDelta( new Chunk<>(orig.getPosition() + minSize, Collections.emptyList()), new Chunk<>(rev.getPosition() + minSize, rev.getLines().subList(minSize, rev.getLines().size())))); } else { deltas.add(new DeleteDelta( new Chunk<>(orig.getPosition() + minSize, orig.getLines().subList(minSize, orig.getLines().size())), new Chunk<>(rev.getPosition() + minSize, Collections.emptyList()))); } return deltas; } return Collections.singletonList(delta); } private DiffRow buildDiffRow(Tag type, String orgline, String newline) { if (reportLinesUnchanged) { return new DiffRow(type, orgline, newline); } else { String wrapOrg = preprocessLine(orgline); if (Tag.DELETE == type) { if (mergeOriginalRevised || showInlineDiffs) { wrapOrg = oldTag.apply(type, true) + wrapOrg + oldTag.apply(type, false); } } String wrapNew = preprocessLine(newline); if (Tag.INSERT == type) { if (mergeOriginalRevised) { wrapOrg = newTag.apply(type, true) + wrapNew + newTag.apply(type, false); } else if (showInlineDiffs) { wrapNew = newTag.apply(type, true) + wrapNew + newTag.apply(type, false); } } return new DiffRow(type, wrapOrg, wrapNew); } } private DiffRow buildDiffRowWithoutNormalizing(Tag type, String orgline, String newline) { return new DiffRow(type, StringUtils.wrapText(orgline, columnWidth), StringUtils.wrapText(newline, columnWidth)); } List normalizeLines(List list) { return reportLinesUnchanged ? list : list.stream() .map(lineNormalizer::apply) .collect(toList()); } /** * Add the inline diffs for given delta * * @param delta the given delta */ private List generateInlineDiffs(AbstractDelta delta) { List orig = normalizeLines(delta.getSource().getLines()); List rev = normalizeLines(delta.getTarget().getLines()); List origList; List revList; String joinedOrig = String.join("\n", orig); String joinedRev = String.join("\n", rev); origList = inlineDiffSplitter.apply(joinedOrig); revList = inlineDiffSplitter.apply(joinedRev); List> inlineDeltas = DiffUtils.diff(origList, revList, equalizer).getDeltas(); Collections.reverse(inlineDeltas); for (AbstractDelta inlineDelta : inlineDeltas) { Chunk inlineOrig = inlineDelta.getSource(); Chunk inlineRev = inlineDelta.getTarget(); if (inlineDelta.getType() == DeltaType.DELETE) { wrapInTag(origList, inlineOrig.getPosition(), inlineOrig .getPosition() + inlineOrig.size(), Tag.DELETE, oldTag, processDiffs, replaceOriginalLinefeedInChangesWithSpaces && mergeOriginalRevised); } else if (inlineDelta.getType() == DeltaType.INSERT) { if (mergeOriginalRevised) { origList.addAll(inlineOrig.getPosition(), revList.subList(inlineRev.getPosition(), inlineRev.getPosition() + inlineRev.size())); wrapInTag(origList, inlineOrig.getPosition(), inlineOrig.getPosition() + inlineRev.size(), Tag.INSERT, newTag, processDiffs, false); } else { wrapInTag(revList, inlineRev.getPosition(), inlineRev.getPosition() + inlineRev.size(), Tag.INSERT, newTag, processDiffs, false); } } else if (inlineDelta.getType() == DeltaType.CHANGE) { if (mergeOriginalRevised) { origList.addAll(inlineOrig.getPosition() + inlineOrig.size(), revList.subList(inlineRev.getPosition(), inlineRev.getPosition() + inlineRev.size())); wrapInTag(origList, inlineOrig.getPosition() + inlineOrig.size(), inlineOrig.getPosition() + inlineOrig.size() + inlineRev.size(), Tag.CHANGE, newTag, processDiffs, false); } else { wrapInTag(revList, inlineRev.getPosition(), inlineRev.getPosition() + inlineRev.size(), Tag.CHANGE, newTag, processDiffs, false); } wrapInTag(origList, inlineOrig.getPosition(), inlineOrig.getPosition() + inlineOrig.size(), Tag.CHANGE, oldTag, processDiffs, replaceOriginalLinefeedInChangesWithSpaces && mergeOriginalRevised); } } StringBuilder origResult = new StringBuilder(); StringBuilder revResult = new StringBuilder(); for (String character : origList) { origResult.append(character); } for (String character : revList) { revResult.append(character); } List original = Arrays.asList(origResult.toString().split("\n")); List revised = Arrays.asList(revResult.toString().split("\n")); List diffRows = new ArrayList<>(); for (int j = 0; j < Math.max(original.size(), revised.size()); j++) { diffRows. add(buildDiffRowWithoutNormalizing(Tag.CHANGE, original.size() > j ? original.get(j) : "", revised.size() > j ? revised.get(j) : "")); } return diffRows; } private String preprocessLine(String line) { if (columnWidth == 0) { return lineNormalizer.apply(line); } else { return StringUtils.wrapText(lineNormalizer.apply(line), columnWidth); } } /** * This class used for building the DiffRowGenerator. * * @author dmitry * */ public static class Builder { private boolean showInlineDiffs = false; private boolean ignoreWhiteSpaces = false; private BiFunction oldTag = (tag, f) -> f ? "" : ""; private BiFunction newTag = (tag, f) -> f ? "" : ""; private int columnWidth = 0; private boolean mergeOriginalRevised = false; private boolean reportLinesUnchanged = false; private Function> inlineDiffSplitter = SPLITTER_BY_CHARACTER; private Function lineNormalizer = LINE_NORMALIZER_FOR_HTML; private Function processDiffs = null; private BiPredicate equalizer = null; private boolean replaceOriginalLinefeedInChangesWithSpaces = false; private Builder() { } /** * Show inline diffs in generating diff rows or not. * * @param val the value to set. Default: false. * @return builder with configured showInlineDiff parameter */ public Builder showInlineDiffs(boolean val) { showInlineDiffs = val; return this; } /** * Ignore white spaces in generating diff rows or not. * * @param val the value to set. Default: true. * @return builder with configured ignoreWhiteSpaces parameter */ public Builder ignoreWhiteSpaces(boolean val) { ignoreWhiteSpaces = val; return this; } /** * Give the originial old and new text lines to Diffrow without any * additional processing and without any tags to highlight the change. * * @param val the value to set. Default: false. * @return builder with configured reportLinesUnWrapped parameter */ public Builder reportLinesUnchanged(final boolean val) { reportLinesUnchanged = val; return this; } /** * Generator for Old-Text-Tags. * * @param generator the tag generator * @return builder with configured ignoreBlankLines parameter */ public Builder oldTag(BiFunction generator) { this.oldTag = generator; return this; } /** * Generator for Old-Text-Tags. * * @param generator the tag generator * @return builder with configured ignoreBlankLines parameter */ public Builder oldTag(Function generator) { this.oldTag = (tag, f) -> generator.apply(f); return this; } /** * Generator for New-Text-Tags. * * @param generator * @return */ public Builder newTag(BiFunction generator) { this.newTag = generator; return this; } /** * Generator for New-Text-Tags. * * @param generator * @return */ public Builder newTag(Function generator) { this.newTag = (tag, f) -> generator.apply(f); return this; } /** * Processor for diffed text parts. Here e.g. whitecharacters could be * replaced by something visible. * * @param processDiffs * @return */ public Builder processDiffs(Function processDiffs) { this.processDiffs = processDiffs; return this; } /** * Set the column width of generated lines of original and revised * texts. * * @param width the width to set. Making it < 0 doesn't make any sense. Default 80. * @return builder with config of column width */ public Builder columnWidth(int width) { if (width >= 0) { columnWidth = width; } return this; } /** * Build the DiffRowGenerator. If some parameters is not set, the * default values are used. * * @return the customized DiffRowGenerator */ public DiffRowGenerator build() { return new DiffRowGenerator(this); } /** * Merge the complete result within the original text. This makes sense * for one line display. * * @param mergeOriginalRevised * @return */ public Builder mergeOriginalRevised(boolean mergeOriginalRevised) { this.mergeOriginalRevised = mergeOriginalRevised; return this; } /** * Per default each character is separatly processed. This variant * introduces processing by word, which does not deliver in word * changes. Therefore the whole word will be tagged as changed: * *
         * false:    (aBa : aba) --  changed: a(B)a : a(b)a
         * true:     (aBa : aba) --  changed: (aBa) : (aba)
         * 
*/ public Builder inlineDiffByWord(boolean inlineDiffByWord) { inlineDiffSplitter = inlineDiffByWord ? SPLITTER_BY_WORD : SPLITTER_BY_CHARACTER; return this; } /** * To provide some customized splitting a splitter can be provided. Here * someone could think about sentence splitter, comma splitter or stuff * like that. * * @param inlineDiffSplitter * @return */ public Builder inlineDiffBySplitter(Function> inlineDiffSplitter) { this.inlineDiffSplitter = inlineDiffSplitter; return this; } /** * By default DiffRowGenerator preprocesses lines for HTML output. Tabs * and special HTML characters like "<" are replaced with its encoded * value. To change this you can provide a customized line normalizer * here. * * @param lineNormalizer * @return */ public Builder lineNormalizer(Function lineNormalizer) { this.lineNormalizer = lineNormalizer; return this; } /** * Provide an equalizer for diff processing. * * @param equalizer equalizer for diff processing. * @return builder with configured equalizer parameter */ public Builder equalizer(BiPredicate equalizer) { this.equalizer = equalizer; return this; } /** * Sometimes it happens that a change contains multiple lines. If there * is no correspondence in old and new. To keep the merged line more * readable the linefeeds could be replaced by spaces. * * @param replace * @return */ public Builder replaceOriginalLinefeedInChangesWithSpaces(boolean replace) { this.replaceOriginalLinefeedInChangesWithSpaces = replace; return this; } } }