001/*
002 * #%L
003 * Netarchivesuite - common - test
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023/** This code is taken from http://www.incava.org/projects/java/java-diff/
024 * It was there released under the LGPL as of Jan 31. 2007. */
025package dk.netarkivet.testutils;
026
027import java.util.ArrayList;
028import java.util.Collection;
029import java.util.Comparator;
030import java.util.HashMap;
031import java.util.Iterator;
032import java.util.List;
033import java.util.ListIterator;
034import java.util.Map;
035import java.util.TreeMap;
036
037/**
038 * Compares two collections, returning a list of the additions, changes, and deletions between them. A
039 * <code>Comparator</code> may be passed as an argument to the constructor, and will thus be used. If not provided, the
040 * initial value in the <code>a</code> ("from") collection will be looked at to see if it supports the
041 * <code>Comparable</code> interface. If so, its <code>equals</code> and <code>compareTo</code> methods will be invoked
042 * on the instances in the "from" and "to" collections; otherwise, for speed, hash codes from the objects will be used
043 * instead for comparison.
044 * <p>
045 * <p>
046 * The file FileDiff.java shows an example usage of this class, in an application similar to the Unix "diff" program.
047 * </p>
048 */
049@SuppressWarnings({"rawtypes", "unchecked"})
050public class Diff {
051    /**
052     * The source array, AKA the "from" values.
053     */
054    protected Object[] a;
055
056    /**
057     * The target array, AKA the "to" values.
058     */
059    protected Object[] b;
060
061    /**
062     * The list of differences, as <code>Difference</code> instances.
063     */
064    protected List diffs = new ArrayList();
065
066    /**
067     * The pending, uncommitted difference.
068     */
069    private Difference pending;
070
071    /**
072     * The comparator used, if any.
073     */
074    private Comparator comparator;
075
076    /**
077     * The thresholds.
078     */
079    private TreeMap thresh;
080
081    /**
082     * Constructs the Diff object for the two arrays, using the given comparator.
083     */
084    public Diff(Object[] a, Object[] b, Comparator comp) {
085        this.a = a;
086        this.b = b;
087        this.comparator = comp;
088        this.thresh = null; // created in getLongestCommonSubsequences
089    }
090
091    /**
092     * Constructs the Diff object for the two arrays, using the default comparison mechanism between the objects, such
093     * as <code>equals</code> and <code>compareTo</code>.
094     */
095    public Diff(Object[] a, Object[] b) {
096        this(a, b, null);
097    }
098
099    /**
100     * Constructs the Diff object for the two collections, using the given comparator.
101     */
102    public Diff(Collection a, Collection b, Comparator comp) {
103        this(a.toArray(), b.toArray(), comp);
104    }
105
106    /**
107     * Constructs the Diff object for the two collections, using the default comparison mechanism between the objects,
108     * such as <code>equals</code> and <code>compareTo</code>.
109     */
110    public Diff(Collection a, Collection b) {
111        this(a, b, null);
112    }
113
114    /**
115     * Runs diff and returns the results.
116     */
117    public List diff() {
118        traverseSequences();
119
120        // add the last difference, if pending:
121        if (pending != null) {
122            diffs.add(pending);
123        }
124
125        return diffs;
126    }
127
128    /**
129     * Traverses the sequences, seeking the longest common subsequences, invoking the methods <code>finishedA</code>,
130     * <code>finishedB</code>, <code>onANotB</code>, and <code>onBNotA</code>.
131     */
132    protected void traverseSequences() {
133        Integer[] matches = getLongestCommonSubsequences();
134
135        int lastA = a.length - 1;
136        int lastB = b.length - 1;
137        int bi = 0;
138        int ai;
139
140        int lastMatch = matches.length - 1;
141
142        for (ai = 0; ai <= lastMatch; ++ai) {
143            Integer bLine = matches[ai];
144
145            if (bLine == null) {
146                onANotB(ai, bi);
147            } else {
148                while (bi < bLine.intValue()) {
149                    onBNotA(ai, bi++);
150                }
151
152                onMatch(ai, bi++);
153            }
154        }
155
156        boolean calledFinishA = false;
157        boolean calledFinishB = false;
158
159        while (ai <= lastA || bi <= lastB) {
160
161            // last A?
162            if (ai == lastA + 1 && bi <= lastB) {
163                if (!calledFinishA && callFinishedA()) {
164                    finishedA(lastA);
165                    calledFinishA = true;
166                } else {
167                    while (bi <= lastB) {
168                        onBNotA(ai, bi++);
169                    }
170                }
171            }
172
173            // last B?
174            if (bi == lastB + 1 && ai <= lastA) {
175                if (!calledFinishB && callFinishedB()) {
176                    finishedB(lastB);
177                    calledFinishB = true;
178                } else {
179                    while (ai <= lastA) {
180                        onANotB(ai++, bi);
181                    }
182                }
183            }
184
185            if (ai <= lastA) {
186                onANotB(ai++, bi);
187            }
188
189            if (bi <= lastB) {
190                onBNotA(ai, bi++);
191            }
192        }
193    }
194
195    /**
196     * Override and return true in order to have <code>finishedA</code> invoked at the last element in the
197     * <code>a</code> array.
198     */
199    protected boolean callFinishedA() {
200        return false;
201    }
202
203    /**
204     * Override and return true in order to have <code>finishedB</code> invoked at the last element in the
205     * <code>b</code> array.
206     */
207    protected boolean callFinishedB() {
208        return false;
209    }
210
211    /**
212     * Invoked at the last element in <code>a</code>, if <code>callFinishedA</code> returns true.
213     */
214    protected void finishedA(int lastA) {
215    }
216
217    /**
218     * Invoked at the last element in <code>b</code>, if <code>callFinishedB</code> returns true.
219     */
220    protected void finishedB(int lastB) {
221    }
222
223    /**
224     * Invoked for elements in <code>a</code> and not in <code>b</code>.
225     */
226    protected void onANotB(int ai, int bi) {
227        if (pending == null) {
228            pending = new Difference(ai, ai, bi, -1);
229        } else {
230            pending.setDeleted(ai);
231        }
232    }
233
234    /**
235     * Invoked for elements in <code>b</code> and not in <code>a</code>.
236     */
237    protected void onBNotA(int ai, int bi) {
238        if (pending == null) {
239            pending = new Difference(ai, -1, bi, bi);
240        } else {
241            pending.setAdded(bi);
242        }
243    }
244
245    /**
246     * Invoked for elements matching in <code>a</code> and <code>b</code>.
247     */
248    protected void onMatch(int ai, int bi) {
249        if (pending == null) {
250            // no current pending
251        } else {
252            diffs.add(pending);
253            pending = null;
254        }
255    }
256
257    /**
258     * Compares the two objects, using the comparator provided with the constructor, if any.
259     */
260    protected boolean equals(Object x, Object y) {
261        return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0;
262    }
263
264    /**
265     * Returns an array of the longest common subsequences.
266     */
267    public Integer[] getLongestCommonSubsequences() {
268        int aStart = 0;
269        int aEnd = a.length - 1;
270
271        int bStart = 0;
272        int bEnd = b.length - 1;
273
274        TreeMap matches = new TreeMap();
275
276        while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) {
277            matches.put(Integer.valueOf(aStart++), Integer.valueOf(bStart++));
278        }
279
280        while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) {
281            matches.put(Integer.valueOf(aEnd--), Integer.valueOf(bEnd--));
282        }
283
284        Map bMatches = null;
285        if (comparator == null) {
286            if (a.length > 0 && a[0] instanceof Comparable) {
287                // this uses the Comparable interface
288                bMatches = new TreeMap();
289            } else {
290                // this just uses hashCode()
291                bMatches = new HashMap();
292            }
293        } else {
294            // we don't really want them sorted, but this is the only Map
295            // implementation (as of JDK 1.4) that takes a comparator.
296            bMatches = new TreeMap(comparator);
297        }
298
299        for (int bi = bStart; bi <= bEnd; ++bi) {
300            Object element = b[bi];
301            Object key = element;
302            List positions = (List) bMatches.get(key);
303            if (positions == null) {
304                positions = new ArrayList();
305                bMatches.put(key, positions);
306            }
307            positions.add(Integer.valueOf(bi));
308        }
309
310        thresh = new TreeMap();
311        Map links = new HashMap();
312
313        for (int i = aStart; i <= aEnd; ++i) {
314            Object aElement = a[i]; // keygen here.
315            List positions = (List) bMatches.get(aElement);
316
317            if (positions != null) {
318                Integer k = Integer.valueOf(0);
319                ListIterator pit = positions.listIterator(positions.size());
320                while (pit.hasPrevious()) {
321                    Integer j = (Integer) pit.previous();
322
323                    k = insert(j, k);
324
325                    if (k == null) {
326                        // nothing
327                    } else {
328                        Object value = k.intValue() > 0 ? links.get(Integer.valueOf(k.intValue() - 1)) : null;
329                        links.put(k, new Object[] {value, Integer.valueOf(i), j});
330                    }
331                }
332            }
333        }
334
335        if (thresh.size() > 0) {
336            Integer ti = (Integer) thresh.lastKey();
337            Object[] link = (Object[]) links.get(ti);
338            while (link != null) {
339                Integer x = (Integer) link[1];
340                Integer y = (Integer) link[2];
341                matches.put(x, y);
342                link = (Object[]) link[0];
343            }
344        }
345
346        return toArray(matches);
347    }
348
349    /**
350     * Converts the map (indexed by java.lang.Integers) into an array.
351     */
352    protected static Integer[] toArray(TreeMap map) {
353        int size = map.size() == 0 ? 0 : 1 + ((Integer) map.lastKey()).intValue();
354        Integer[] ary = new Integer[size];
355        Iterator it = map.keySet().iterator();
356
357        while (it.hasNext()) {
358            Integer idx = (Integer) it.next();
359            Integer val = (Integer) map.get(idx);
360            ary[idx.intValue()] = val;
361        }
362        return ary;
363    }
364
365    /**
366     * Returns whether the integer is not zero (including if it is not null).
367     */
368    protected static boolean isNonzero(Integer i) {
369        return i != null && i.intValue() != 0;
370    }
371
372    /**
373     * Returns whether the value in the map for the given index is greater than the given value.
374     */
375    protected boolean isGreaterThan(Integer index, Integer val) {
376        Integer lhs = (Integer) thresh.get(index);
377        return lhs != null && val != null && lhs.compareTo(val) > 0;
378    }
379
380    /**
381     * Returns whether the value in the map for the given index is less than the given value.
382     */
383    protected boolean isLessThan(Integer index, Integer val) {
384        Integer lhs = (Integer) thresh.get(index);
385        return lhs != null && (val == null || lhs.compareTo(val) < 0);
386    }
387
388    /**
389     * Returns the value for the greatest key in the map.
390     */
391    protected Integer getLastValue() {
392        return (Integer) thresh.get(thresh.lastKey());
393    }
394
395    /**
396     * Adds the given value to the "end" of the threshold map, that is, with the greatest index/key.
397     */
398    protected void append(Integer value) {
399        Integer addIdx = null;
400        if (thresh.size() == 0) {
401            addIdx = Integer.valueOf(0);
402        } else {
403            Integer lastKey = (Integer) thresh.lastKey();
404            addIdx = Integer.valueOf(lastKey.intValue() + 1);
405        }
406        thresh.put(addIdx, value);
407    }
408
409    /**
410     * Inserts the given values into the threshold map.
411     */
412    protected Integer insert(Integer j, Integer k) {
413        if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(Integer.valueOf(k.intValue() - 1), j)) {
414            thresh.put(k, j);
415        } else {
416            int hi = -1;
417
418            if (isNonzero(k)) {
419                hi = k.intValue();
420            } else if (thresh.size() > 0) {
421                hi = ((Integer) thresh.lastKey()).intValue();
422            }
423
424            // off the end?
425            if (hi == -1 || j.compareTo(getLastValue()) > 0) {
426                append(j);
427                k = Integer.valueOf(hi + 1);
428            } else {
429                // binary search for insertion point:
430                int lo = 0;
431
432                while (lo <= hi) {
433                    int index = (hi + lo) / 2;
434                    Integer val = (Integer) thresh.get(Integer.valueOf(index));
435                    int cmp = j.compareTo(val);
436
437                    if (cmp == 0) {
438                        return null;
439                    } else if (cmp > 0) {
440                        lo = index + 1;
441                    } else {
442                        hi = index - 1;
443                    }
444                }
445
446                thresh.put(Integer.valueOf(lo), j);
447                k = Integer.valueOf(lo);
448            }
449        }
450
451        return k;
452    }
453
454}