001/* 002 * #%L 003 * Netarchivesuite - common - test 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023/** This code is taken from http://www.incava.org/projects/java/java-diff/ 024 * It was there released under the LGPL as of Jan 31. 2007. */ 025package dk.netarkivet.testutils; 026 027import java.util.ArrayList; 028import java.util.Collection; 029import java.util.Comparator; 030import java.util.HashMap; 031import java.util.Iterator; 032import java.util.List; 033import java.util.ListIterator; 034import java.util.Map; 035import java.util.TreeMap; 036 037/** 038 * Compares two collections, returning a list of the additions, changes, and deletions between them. A 039 * <code>Comparator</code> may be passed as an argument to the constructor, and will thus be used. If not provided, the 040 * initial value in the <code>a</code> ("from") collection will be looked at to see if it supports the 041 * <code>Comparable</code> interface. If so, its <code>equals</code> and <code>compareTo</code> methods will be invoked 042 * on the instances in the "from" and "to" collections; otherwise, for speed, hash codes from the objects will be used 043 * instead for comparison. 044 * <p> 045 * <p> 046 * The file FileDiff.java shows an example usage of this class, in an application similar to the Unix "diff" program. 047 * </p> 048 */ 049@SuppressWarnings({"rawtypes", "unchecked"}) 050public class Diff { 051 /** 052 * The source array, AKA the "from" values. 053 */ 054 protected Object[] a; 055 056 /** 057 * The target array, AKA the "to" values. 058 */ 059 protected Object[] b; 060 061 /** 062 * The list of differences, as <code>Difference</code> instances. 063 */ 064 protected List diffs = new ArrayList(); 065 066 /** 067 * The pending, uncommitted difference. 068 */ 069 private Difference pending; 070 071 /** 072 * The comparator used, if any. 073 */ 074 private Comparator comparator; 075 076 /** 077 * The thresholds. 078 */ 079 private TreeMap thresh; 080 081 /** 082 * Constructs the Diff object for the two arrays, using the given comparator. 083 */ 084 public Diff(Object[] a, Object[] b, Comparator comp) { 085 this.a = a; 086 this.b = b; 087 this.comparator = comp; 088 this.thresh = null; // created in getLongestCommonSubsequences 089 } 090 091 /** 092 * Constructs the Diff object for the two arrays, using the default comparison mechanism between the objects, such 093 * as <code>equals</code> and <code>compareTo</code>. 094 */ 095 public Diff(Object[] a, Object[] b) { 096 this(a, b, null); 097 } 098 099 /** 100 * Constructs the Diff object for the two collections, using the given comparator. 101 */ 102 public Diff(Collection a, Collection b, Comparator comp) { 103 this(a.toArray(), b.toArray(), comp); 104 } 105 106 /** 107 * Constructs the Diff object for the two collections, using the default comparison mechanism between the objects, 108 * such as <code>equals</code> and <code>compareTo</code>. 109 */ 110 public Diff(Collection a, Collection b) { 111 this(a, b, null); 112 } 113 114 /** 115 * Runs diff and returns the results. 116 */ 117 public List diff() { 118 traverseSequences(); 119 120 // add the last difference, if pending: 121 if (pending != null) { 122 diffs.add(pending); 123 } 124 125 return diffs; 126 } 127 128 /** 129 * Traverses the sequences, seeking the longest common subsequences, invoking the methods <code>finishedA</code>, 130 * <code>finishedB</code>, <code>onANotB</code>, and <code>onBNotA</code>. 131 */ 132 protected void traverseSequences() { 133 Integer[] matches = getLongestCommonSubsequences(); 134 135 int lastA = a.length - 1; 136 int lastB = b.length - 1; 137 int bi = 0; 138 int ai; 139 140 int lastMatch = matches.length - 1; 141 142 for (ai = 0; ai <= lastMatch; ++ai) { 143 Integer bLine = matches[ai]; 144 145 if (bLine == null) { 146 onANotB(ai, bi); 147 } else { 148 while (bi < bLine.intValue()) { 149 onBNotA(ai, bi++); 150 } 151 152 onMatch(ai, bi++); 153 } 154 } 155 156 boolean calledFinishA = false; 157 boolean calledFinishB = false; 158 159 while (ai <= lastA || bi <= lastB) { 160 161 // last A? 162 if (ai == lastA + 1 && bi <= lastB) { 163 if (!calledFinishA && callFinishedA()) { 164 finishedA(lastA); 165 calledFinishA = true; 166 } else { 167 while (bi <= lastB) { 168 onBNotA(ai, bi++); 169 } 170 } 171 } 172 173 // last B? 174 if (bi == lastB + 1 && ai <= lastA) { 175 if (!calledFinishB && callFinishedB()) { 176 finishedB(lastB); 177 calledFinishB = true; 178 } else { 179 while (ai <= lastA) { 180 onANotB(ai++, bi); 181 } 182 } 183 } 184 185 if (ai <= lastA) { 186 onANotB(ai++, bi); 187 } 188 189 if (bi <= lastB) { 190 onBNotA(ai, bi++); 191 } 192 } 193 } 194 195 /** 196 * Override and return true in order to have <code>finishedA</code> invoked at the last element in the 197 * <code>a</code> array. 198 */ 199 protected boolean callFinishedA() { 200 return false; 201 } 202 203 /** 204 * Override and return true in order to have <code>finishedB</code> invoked at the last element in the 205 * <code>b</code> array. 206 */ 207 protected boolean callFinishedB() { 208 return false; 209 } 210 211 /** 212 * Invoked at the last element in <code>a</code>, if <code>callFinishedA</code> returns true. 213 */ 214 protected void finishedA(int lastA) { 215 } 216 217 /** 218 * Invoked at the last element in <code>b</code>, if <code>callFinishedB</code> returns true. 219 */ 220 protected void finishedB(int lastB) { 221 } 222 223 /** 224 * Invoked for elements in <code>a</code> and not in <code>b</code>. 225 */ 226 protected void onANotB(int ai, int bi) { 227 if (pending == null) { 228 pending = new Difference(ai, ai, bi, -1); 229 } else { 230 pending.setDeleted(ai); 231 } 232 } 233 234 /** 235 * Invoked for elements in <code>b</code> and not in <code>a</code>. 236 */ 237 protected void onBNotA(int ai, int bi) { 238 if (pending == null) { 239 pending = new Difference(ai, -1, bi, bi); 240 } else { 241 pending.setAdded(bi); 242 } 243 } 244 245 /** 246 * Invoked for elements matching in <code>a</code> and <code>b</code>. 247 */ 248 protected void onMatch(int ai, int bi) { 249 if (pending == null) { 250 // no current pending 251 } else { 252 diffs.add(pending); 253 pending = null; 254 } 255 } 256 257 /** 258 * Compares the two objects, using the comparator provided with the constructor, if any. 259 */ 260 protected boolean equals(Object x, Object y) { 261 return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0; 262 } 263 264 /** 265 * Returns an array of the longest common subsequences. 266 */ 267 public Integer[] getLongestCommonSubsequences() { 268 int aStart = 0; 269 int aEnd = a.length - 1; 270 271 int bStart = 0; 272 int bEnd = b.length - 1; 273 274 TreeMap matches = new TreeMap(); 275 276 while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) { 277 matches.put(Integer.valueOf(aStart++), Integer.valueOf(bStart++)); 278 } 279 280 while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) { 281 matches.put(Integer.valueOf(aEnd--), Integer.valueOf(bEnd--)); 282 } 283 284 Map bMatches = null; 285 if (comparator == null) { 286 if (a.length > 0 && a[0] instanceof Comparable) { 287 // this uses the Comparable interface 288 bMatches = new TreeMap(); 289 } else { 290 // this just uses hashCode() 291 bMatches = new HashMap(); 292 } 293 } else { 294 // we don't really want them sorted, but this is the only Map 295 // implementation (as of JDK 1.4) that takes a comparator. 296 bMatches = new TreeMap(comparator); 297 } 298 299 for (int bi = bStart; bi <= bEnd; ++bi) { 300 Object element = b[bi]; 301 Object key = element; 302 List positions = (List) bMatches.get(key); 303 if (positions == null) { 304 positions = new ArrayList(); 305 bMatches.put(key, positions); 306 } 307 positions.add(Integer.valueOf(bi)); 308 } 309 310 thresh = new TreeMap(); 311 Map links = new HashMap(); 312 313 for (int i = aStart; i <= aEnd; ++i) { 314 Object aElement = a[i]; // keygen here. 315 List positions = (List) bMatches.get(aElement); 316 317 if (positions != null) { 318 Integer k = Integer.valueOf(0); 319 ListIterator pit = positions.listIterator(positions.size()); 320 while (pit.hasPrevious()) { 321 Integer j = (Integer) pit.previous(); 322 323 k = insert(j, k); 324 325 if (k == null) { 326 // nothing 327 } else { 328 Object value = k.intValue() > 0 ? links.get(Integer.valueOf(k.intValue() - 1)) : null; 329 links.put(k, new Object[] {value, Integer.valueOf(i), j}); 330 } 331 } 332 } 333 } 334 335 if (thresh.size() > 0) { 336 Integer ti = (Integer) thresh.lastKey(); 337 Object[] link = (Object[]) links.get(ti); 338 while (link != null) { 339 Integer x = (Integer) link[1]; 340 Integer y = (Integer) link[2]; 341 matches.put(x, y); 342 link = (Object[]) link[0]; 343 } 344 } 345 346 return toArray(matches); 347 } 348 349 /** 350 * Converts the map (indexed by java.lang.Integers) into an array. 351 */ 352 protected static Integer[] toArray(TreeMap map) { 353 int size = map.size() == 0 ? 0 : 1 + ((Integer) map.lastKey()).intValue(); 354 Integer[] ary = new Integer[size]; 355 Iterator it = map.keySet().iterator(); 356 357 while (it.hasNext()) { 358 Integer idx = (Integer) it.next(); 359 Integer val = (Integer) map.get(idx); 360 ary[idx.intValue()] = val; 361 } 362 return ary; 363 } 364 365 /** 366 * Returns whether the integer is not zero (including if it is not null). 367 */ 368 protected static boolean isNonzero(Integer i) { 369 return i != null && i.intValue() != 0; 370 } 371 372 /** 373 * Returns whether the value in the map for the given index is greater than the given value. 374 */ 375 protected boolean isGreaterThan(Integer index, Integer val) { 376 Integer lhs = (Integer) thresh.get(index); 377 return lhs != null && val != null && lhs.compareTo(val) > 0; 378 } 379 380 /** 381 * Returns whether the value in the map for the given index is less than the given value. 382 */ 383 protected boolean isLessThan(Integer index, Integer val) { 384 Integer lhs = (Integer) thresh.get(index); 385 return lhs != null && (val == null || lhs.compareTo(val) < 0); 386 } 387 388 /** 389 * Returns the value for the greatest key in the map. 390 */ 391 protected Integer getLastValue() { 392 return (Integer) thresh.get(thresh.lastKey()); 393 } 394 395 /** 396 * Adds the given value to the "end" of the threshold map, that is, with the greatest index/key. 397 */ 398 protected void append(Integer value) { 399 Integer addIdx = null; 400 if (thresh.size() == 0) { 401 addIdx = Integer.valueOf(0); 402 } else { 403 Integer lastKey = (Integer) thresh.lastKey(); 404 addIdx = Integer.valueOf(lastKey.intValue() + 1); 405 } 406 thresh.put(addIdx, value); 407 } 408 409 /** 410 * Inserts the given values into the threshold map. 411 */ 412 protected Integer insert(Integer j, Integer k) { 413 if (isNonzero(k) && isGreaterThan(k, j) && isLessThan(Integer.valueOf(k.intValue() - 1), j)) { 414 thresh.put(k, j); 415 } else { 416 int hi = -1; 417 418 if (isNonzero(k)) { 419 hi = k.intValue(); 420 } else if (thresh.size() > 0) { 421 hi = ((Integer) thresh.lastKey()).intValue(); 422 } 423 424 // off the end? 425 if (hi == -1 || j.compareTo(getLastValue()) > 0) { 426 append(j); 427 k = Integer.valueOf(hi + 1); 428 } else { 429 // binary search for insertion point: 430 int lo = 0; 431 432 while (lo <= hi) { 433 int index = (hi + lo) / 2; 434 Integer val = (Integer) thresh.get(Integer.valueOf(index)); 435 int cmp = j.compareTo(val); 436 437 if (cmp == 0) { 438 return null; 439 } else if (cmp > 0) { 440 lo = index + 1; 441 } else { 442 hi = index - 1; 443 } 444 } 445 446 thresh.put(Integer.valueOf(lo), j); 447 k = Integer.valueOf(lo); 448 } 449 } 450 451 return k; 452 } 453 454}