001/* Copyright (C) 2013 TU Dortmund 002 * This file is part of LearnLib, http://www.learnlib.de/. 003 * 004 * LearnLib is free software; you can redistribute it and/or 005 * modify it under the terms of the GNU Lesser General Public 006 * License version 3.0 as published by the Free Software Foundation. 007 * 008 * LearnLib is distributed in the hope that it will be useful, 009 * but WITHOUT ANY WARRANTY; without even the implied warranty of 010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 011 * Lesser General Public License for more details. 012 * 013 * You should have received a copy of the GNU Lesser General Public 014 * License along with LearnLib; if not, see 015 * <http://www.gnu.de/documents/lgpl.en.html>. 016 */ 017package de.learnlib.algorithms.lstargeneric.table; 018 019import gnu.trove.map.TObjectIntMap; 020import gnu.trove.map.hash.TObjectIntHashMap; 021 022import java.util.ArrayList; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.Iterator; 026import java.util.List; 027import java.util.Map; 028 029import net.automatalib.words.Alphabet; 030import net.automatalib.words.Word; 031import de.learnlib.api.AccessSequenceTransformer; 032import de.learnlib.api.MembershipOracle; 033import de.learnlib.oracles.DefaultQuery; 034 035 036/** 037 * Observation table class. 038 * <p> 039 * An observation table (OT) is the central data structure used by Angluin's L* algorithm, 040 * as described in the paper "Learning Regular Sets from Queries and Counterexamples". 041 * <p> 042 * An observation table is a two-dimensional table, with rows indexed by prefixes, 043 * and columns indexed by suffixes. For a prefix <code>u</code> and a suffix <code>v</code>, 044 * the respective cell contains the result of the membership query <code>(u, v)</code>. 045 * <p> 046 * The set of prefixes (row labels) is divided into two disjoint sets: short and long prefixes. 047 * Each long prefix is a one-letter extension of a short prefix; conversely, every time a 048 * prefix is added to the set of short prefixes, all possible one-letter extensions are added 049 * to the set of long prefixes. 050 * <p> 051 * In order to derive a well-defined hypothesis from an observation table, it must satisfy two 052 * properties: closedness and consistency. 053 * <ul> 054 * <li>An observation table is <b>closed</b> iff for each long prefix <code>u</code> there exists 055 * a short prefix <code>u'</code> such that the row contents for both prefixes are equal. 056 * <li>An observation table is <b>consistent</b> iff for every two short prefixes <code>u</code> and 057 * <code>u'</code> with identical row contents, it holds that for every input symbol <code>a</code> 058 * the rows indexed by <code>ua</code> and <code>u'a</code> also have identical contents. 059 * </ul> 060 * 061 * @author Malte Isberner <malte.isberner@gmail.com> 062 * 063 * @param <I> input symbol class 064 * @param <O> output class 065 */ 066public final class ObservationTable<I,O> implements AccessSequenceTransformer<I> { 067 068 // private static final Integer NO_ENTRY = null; 069 private static final int NO_ENTRY = -1; 070 071 072 private final Alphabet<I> alphabet; 073 074 private final List<Row<I>> shortPrefixRows 075 = new ArrayList<Row<I>>(); 076 private final List<Row<I>> longPrefixRows 077 = new ArrayList<Row<I>>(); 078 079 private final List<Row<I>> allRows 080 = new ArrayList<Row<I>>(); 081 082 private final List<List<O>> allRowContents 083 = new ArrayList<List<O>>(); 084 085 private final List<Row<I>> canonicalRows 086 = new ArrayList<Row<I>>(); 087 088 /* 089 private final Map<List<O>,Integer> rowContentIds 090 = new HashMap<List<O>,Integer>(); 091 */ 092 093 private final TObjectIntMap<List<O>> rowContentIds 094 = new TObjectIntHashMap<>(10, 0.75f, NO_ENTRY); 095 096 private final Map<Word<I>,Row<I>> rowMap 097 = new HashMap<Word<I>,Row<I>>(); 098 099 private int numRows = 0; 100 101 private final List<Word<I>> suffixes 102 = new ArrayList<Word<I>>(); 103 104 105 /** 106 * Constructor. 107 * @param alphabet the learning alphabet. 108 */ 109 public ObservationTable(Alphabet<I> alphabet) { 110 this.alphabet = alphabet; 111 } 112 113 /** 114 * Initializes an observation table using a specified set of suffixes. 115 * 116 * @param initialSuffixes the set of initial column labels. 117 * @param oracle the {@link MembershipOracle} to use for performing queries 118 * @return a list of equivalence classes of unclosed rows 119 */ 120 public List<List<Row<I>>> initialize(List<Word<I>> initialSuffixes, MembershipOracle<I,O> oracle) { 121 if(allRows.size() > 0) 122 throw new IllegalStateException("Called initialize, but there are already rows present"); 123 124 int numSuffixes = initialSuffixes.size(); 125 suffixes.addAll(initialSuffixes); 126 127 int numLps = alphabet.size(); 128 int numPrefixes = 1 + numLps; 129 130 List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(numPrefixes * numSuffixes); 131 132 Word<I> eps = Word.epsilon(); 133 Row<I> epsRow = createSpRow(Word.<I>epsilon()); 134 135 buildQueries(queries, eps, suffixes); 136 137 for(int i = 0; i < alphabet.size(); i++) { 138 I sym = alphabet.getSymbol(i); 139 Word<I> w = Word.fromLetter(sym); 140 Row<I> lpRow = createLpRow(w); 141 buildQueries(queries, w, suffixes); 142 epsRow.setSuccessor(i, lpRow); 143 } 144 145 oracle.processQueries(queries); 146 147 Iterator<DefaultQuery<I,O>> queryIt = queries.iterator(); 148 149 List<O> firstRowContents = new ArrayList<O>(numSuffixes); 150 fetchResults(queryIt, firstRowContents, numSuffixes); 151 processContents(epsRow, firstRowContents, true); 152 153 List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>(); 154 155 for(Row<I> lpRow : longPrefixRows) { 156 List<O> rowContents = new ArrayList<O>(numSuffixes); 157 fetchResults(queryIt, rowContents, numSuffixes); 158 if(processContents(lpRow, rowContents, false)) 159 unclosed.add(new ArrayList<Row<I>>()); 160 161 int id = lpRow.getRowContentId(); 162 163 if(id > 0) 164 unclosed.get(id - 1).add(lpRow); 165 } 166 167 return unclosed; 168 } 169 170 /** 171 * Adds a suffix to the list of distinguishing suffixes. This is a convenience method 172 * that can be used as shorthand for 173 * <code>addSufixes(Collections.singletonList(suffix), oracle)</code> 174 * @param suffix the suffix to add 175 * @param oracle the membership oracle 176 * @return a list of equivalence classes of unclosed rows 177 */ 178 public List<List<Row<I>>> addSuffix(Word<I> suffix, MembershipOracle<I,O> oracle) { 179 return addSuffixes(Collections.singletonList(suffix), oracle); 180 } 181 182 /** 183 * Adds suffixes to the list of distinguishing suffixes. 184 * @param newSuffixes the suffixes to add 185 * @param oracle the membership oracle 186 * @return a list of equivalence classes of unclosed rows 187 */ 188 public List<List<Row<I>>> addSuffixes(List<Word<I>> newSuffixes, MembershipOracle<I, O> oracle) { 189 int oldSuffixCount = suffixes.size(); 190 int numNewSuffixes = newSuffixes.size(); 191 192 int numSpRows = shortPrefixRows.size(); 193 int rowCount = numSpRows + longPrefixRows.size(); 194 195 List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(rowCount * numNewSuffixes); 196 197 for(Row<I> row : shortPrefixRows) 198 buildQueries(queries, row.getPrefix(), newSuffixes); 199 200 for(Row<I> row : longPrefixRows) 201 buildQueries(queries, row.getPrefix(), newSuffixes); 202 203 oracle.processQueries(queries); 204 205 Iterator<DefaultQuery<I,O>> queryIt = queries.iterator(); 206 207 for(Row<I> row : shortPrefixRows) { 208 List<O> rowContents = allRowContents.get(row.getRowContentId()); 209 if(rowContents.size() == oldSuffixCount) { 210 rowContentIds.remove(rowContents); 211 fetchResults(queryIt, rowContents, numNewSuffixes); 212 rowContentIds.put(rowContents, row.getRowContentId()); 213 } 214 else { 215 List<O> newContents = new ArrayList<O>(oldSuffixCount + numNewSuffixes); 216 newContents.addAll(rowContents.subList(0, oldSuffixCount)); 217 fetchResults(queryIt, newContents, numNewSuffixes); 218 processContents(row, newContents, true); 219 } 220 } 221 222 List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>(); 223 numSpRows = numDistinctRows(); 224 225 for(Row<I> row : longPrefixRows) { 226 List<O> rowContents = allRowContents.get(row.getRowContentId()); 227 if(rowContents.size() == oldSuffixCount) { 228 rowContentIds.remove(rowContents); 229 fetchResults(queryIt, rowContents, numNewSuffixes); 230 rowContentIds.put(rowContents, row.getRowContentId()); 231 } 232 else { 233 List<O> newContents = new ArrayList<O>(oldSuffixCount + numNewSuffixes); 234 newContents.addAll(rowContents.subList(0, oldSuffixCount)); 235 fetchResults(queryIt, newContents, numNewSuffixes); 236 if(processContents(row, newContents, false)) 237 unclosed.add(new ArrayList<Row<I>>()); 238 239 int id = row.getRowContentId(); 240 if(id >= numSpRows) 241 unclosed.get(id - numSpRows).add(row); 242 } 243 } 244 245 this.suffixes.addAll(newSuffixes); 246 247 return unclosed; 248 } 249 250 251 /** 252 * Moves the specified rows to the set of short prefix rows. If some of the specified 253 * rows already are short prefix rows, they are ignored (unless they do not have any 254 * contents, in which case they are completed). 255 * @param lpRows the rows to move to the set of short prefix rows 256 * @param oracle the membership oracle 257 * @return a list of equivalence classes of unclosed rows 258 */ 259 public List<List<Row<I>>> toShortPrefixes(List<Row<I>> lpRows, MembershipOracle<I,O> oracle) { 260 List<Row<I>> freshSpRows = new ArrayList<Row<I>>(); 261 List<Row<I>> freshLpRows = new ArrayList<Row<I>>(); 262 263 for(Row<I> row : lpRows) { 264 if(row.isShortPrefix()) { 265 if(row.hasContents()) 266 continue; 267 freshSpRows.add(row); 268 } 269 else { 270 makeShort(row); 271 if(!row.hasContents()) 272 freshSpRows.add(row); 273 } 274 275 Word<I> prefix = row.getPrefix(); 276 277 for(int i = 0; i < alphabet.size(); i++) { 278 I sym = alphabet.getSymbol(i); 279 Word<I> lp = prefix.append(sym); 280 Row<I> lpRow = rowMap.get(lp); 281 if(lpRow == null) { 282 lpRow = createLpRow(lp); 283 freshLpRows.add(lpRow); 284 } 285 row.setSuccessor(i, lpRow); 286 } 287 } 288 289 int numSuffixes = suffixes.size(); 290 291 int numFreshRows = freshSpRows.size() + freshLpRows.size(); 292 List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(numFreshRows * numSuffixes); 293 buildRowQueries(queries, freshSpRows, suffixes); 294 buildRowQueries(queries, freshLpRows, suffixes); 295 296 oracle.processQueries(queries); 297 Iterator<DefaultQuery<I,O>> queryIt = queries.iterator(); 298 299 for(Row<I> row : freshSpRows) { 300 List<O> contents = new ArrayList<O>(numSuffixes); 301 fetchResults(queryIt, contents, numSuffixes); 302 processContents(row, contents, true); 303 } 304 305 int numSpRows = numDistinctRows(); 306 List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>(); 307 308 for(Row<I> row : freshLpRows) { 309 List<O> contents = new ArrayList<O>(numSuffixes); 310 fetchResults(queryIt, contents, numSuffixes); 311 if(processContents(row, contents, false)) 312 unclosed.add(new ArrayList<Row<I>>()); 313 314 int id = row.getRowContentId(); 315 if(id >= numSpRows) 316 unclosed.get(id - numSpRows).add(row); 317 } 318 319 return unclosed; 320 } 321 322 public Row<I> getRowSuccessor(Row<I> row, I sym) { 323 return row.getSuccessor(alphabet.getSymbolIndex(sym)); 324 } 325 326 public List<List<Row<I>>> addShortPrefixes(List<Word<I>> shortPrefixes, MembershipOracle<I,O> oracle) { 327 List<Row<I>> toSpRows = new ArrayList<Row<I>>(); 328 329 for(Word<I> sp : shortPrefixes) { 330 Row<I> row = rowMap.get(sp); 331 if(row != null) { 332 if(row.isShortPrefix()) 333 continue; 334 } 335 else 336 row = createSpRow(sp); 337 toSpRows.add(row); 338 } 339 340 return toShortPrefixes(toSpRows, oracle); 341 } 342 343 344 @SuppressWarnings("unchecked") 345 public Inconsistency<I,O> findInconsistency() { 346 Row<I>[] canonicRows = (Row<I>[])new Row<?>[numDistinctRows()]; 347 348 for(Row<I> spRow : shortPrefixRows) { 349 int contentId = spRow.getRowContentId(); 350 351 Row<I> canRow = canonicRows[contentId]; 352 if(canRow == null) { 353 canonicRows[contentId] = spRow; 354 continue; 355 } 356 357 for(int i = 0; i < alphabet.size(); i++) { 358 int spSuccContent = spRow.getSuccessor(i).getRowContentId(); 359 int canSuccContent = canRow.getSuccessor(i).getRowContentId(); 360 if(spSuccContent != canSuccContent) 361 return new Inconsistency<I,O>(canRow, spRow, i); 362 } 363 } 364 365 return null; 366 } 367 368 369 protected boolean makeShort(Row<I> row) { 370 if(row.isShortPrefix()) 371 return false; 372 373 int lastIdx = longPrefixRows.size() - 1; 374 Row<I> last = longPrefixRows.get(lastIdx); 375 int rowIdx = row.getLpIndex(); 376 longPrefixRows.remove(lastIdx); 377 if(last != row) { 378 longPrefixRows.set(rowIdx, last); 379 last.setLpIndex(rowIdx); 380 } 381 382 shortPrefixRows.add(row); 383 row.makeShort(alphabet.size()); 384 385 if(row.hasContents()) { 386 int cid = row.getRowContentId(); 387 if(canonicalRows.get(cid) == null) 388 canonicalRows.set(cid, row); 389 } 390 return true; 391 } 392 393 protected Row<I> createLpRow(Word<I> prefix) { 394 Row<I> newRow = new Row<I>(prefix, numRows++); 395 allRows.add(newRow); 396 rowMap.put(prefix, newRow); 397 int idx = longPrefixRows.size(); 398 longPrefixRows.add(newRow); 399 newRow.setLpIndex(idx); 400 return newRow; 401 } 402 403 protected Row<I> createSpRow(Word<I> prefix) { 404 Row<I> newRow = new Row<I>(prefix, numRows++, alphabet.size()); 405 allRows.add(newRow); 406 rowMap.put(prefix, newRow); 407 shortPrefixRows.add(newRow); 408 return newRow; 409 } 410 411 public List<O> rowContents(Row<I> row) { 412 return allRowContents.get(row.getRowContentId()); 413 } 414 415 public O cellContents(Row<I> row, int columnId) { 416 List<O> contents = rowContents(row); 417 return contents.get(columnId); 418 } 419 420 public Row<I> getRow(int rowId) { 421 return allRows.get(rowId); 422 } 423 424 public int numDistinctRows() { 425 return allRowContents.size(); 426 } 427 428 public List<Row<I>> getShortPrefixRows() { 429 return shortPrefixRows; 430 } 431 432 public int numShortPrefixRows() { 433 return shortPrefixRows.size(); 434 } 435 436 public int numLongPrefixRows() { 437 return longPrefixRows.size(); 438 } 439 440 public int numTotalRows() { 441 return shortPrefixRows.size() + longPrefixRows.size(); 442 } 443 444 public int numSuffixes() { 445 return suffixes.size(); 446 } 447 448 public List<Word<I>> getSuffixes() { 449 return suffixes; 450 } 451 452 protected boolean processContents(Row<I> row, List<O> rowContents, boolean makeCanonical) { 453 // Integer contentId; 454 int contentId; 455 boolean added = false; 456 if((contentId = rowContentIds.get(rowContents)) == NO_ENTRY) { 457 rowContentIds.put(rowContents, contentId = numDistinctRows()); 458 allRowContents.add(rowContents); 459 added = true; 460 if(makeCanonical) 461 canonicalRows.add(row); 462 else 463 canonicalRows.add(null); 464 } 465 row.setRowContentId(contentId); 466 return added; 467 } 468 469 protected static <I,O> 470 void buildQueries(List<DefaultQuery<I,O>> queryList, List<Word<I>> prefixes, List<Word<I>> suffixes) { 471 for(Word<I> prefix : prefixes) 472 buildQueries(queryList, prefix, suffixes); 473 } 474 475 protected static <I,O> 476 void buildRowQueries(List<DefaultQuery<I,O>> queryList, List<Row<I>> rows, List<Word<I>> suffixes) { 477 for(Row<I> row : rows) 478 buildQueries(queryList, row.getPrefix(), suffixes); 479 } 480 481 protected static <I,O> 482 void buildQueries(List<DefaultQuery<I,O>> queryList, Word<I> prefix, List<Word<I>> suffixes) { 483 for(Word<I> suffix : suffixes) 484 queryList.add(new DefaultQuery<I,O>(prefix, suffix)); 485 } 486 487 /** 488 * Fetches the given number of query responses and adds them to the specified output list. 489 * Also, the query iterator is advanced accordingly. 490 * @param queryIt the query iterator 491 * @param output the output list to write to 492 * @param numSuffixes the number of suffixes (queries) 493 */ 494 protected static <I,O> 495 void fetchResults(Iterator<DefaultQuery<I,O>> queryIt, List<O> output, int numSuffixes) { 496 for(int j = 0; j < numSuffixes; j++) { 497 DefaultQuery<I,O> qry = queryIt.next(); 498 output.add(qry.getOutput()); 499 } 500 } 501 502 /** 503 * Checks whether this observation table has been initialized yet (i.e., contains any rows). 504 * @return <tt>true</tt> iff the table has been initialized 505 */ 506 public boolean isInitialized() { 507 return (allRows.size() > 0); 508 } 509 510 /** 511 * Retrieves the input alphabet used in this observation table. 512 * @return the input alphabet 513 */ 514 public Alphabet<I> getInputAlphabet() { 515 return alphabet; 516 } 517 518 /* 519 * (non-Javadoc) 520 * @see de.learnlib.api.AccessSequenceTransformer#transformAccessSequence(net.automatalib.words.Word) 521 */ 522 @Override 523 public Word<I> transformAccessSequence(Word<I> word) { 524 Row<I> current = shortPrefixRows.get(0); 525 526 for(I sym : word) { 527 current = getRowSuccessor(current, sym); 528 if(!current.isShortPrefix()) 529 current = canonicalRows.get(current.getRowContentId()); 530 } 531 532 return current.getPrefix(); 533 } 534 535 /* 536 * (non-Javadoc) 537 * @see de.learnlib.api.AccessSequenceTransformer#isAccessSequence(net.automatalib.words.Word) 538 */ 539 @Override 540 public boolean isAccessSequence(Word<I> word) { 541 Row<I> current = shortPrefixRows.get(0); 542 543 for(I sym : word) { 544 current = getRowSuccessor(current, sym); 545 if(!current.isShortPrefix()) 546 return false; 547 } 548 549 return true; 550 } 551}