001/* Copyright (C) 2013 TU Dortmund
002 * This file is part of LearnLib, http://www.learnlib.de/.
003 * 
004 * LearnLib is free software; you can redistribute it and/or
005 * modify it under the terms of the GNU Lesser General Public
006 * License version 3.0 as published by the Free Software Foundation.
007 * 
008 * LearnLib is distributed in the hope that it will be useful,
009 * but WITHOUT ANY WARRANTY; without even the implied warranty of
010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
011 * Lesser General Public License for more details.
012 * 
013 * You should have received a copy of the GNU Lesser General Public
014 * License along with LearnLib; if not, see
015 * <http://www.gnu.de/documents/lgpl.en.html>.
016 */
017package de.learnlib.algorithms.lstargeneric.table;
018
019import gnu.trove.map.TObjectIntMap;
020import gnu.trove.map.hash.TObjectIntHashMap;
021
022import java.util.ArrayList;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Map;
028
029import net.automatalib.words.Alphabet;
030import net.automatalib.words.Word;
031import de.learnlib.api.AccessSequenceTransformer;
032import de.learnlib.api.MembershipOracle;
033import de.learnlib.oracles.DefaultQuery;
034
035
036/**
037 * Observation table class.
038 * <p>
039 * An observation table (OT) is the central data structure used by Angluin's L* algorithm,
040 * as described in the paper "Learning Regular Sets from Queries and Counterexamples".
041 * <p>
042 * An observation table is a two-dimensional table, with rows indexed by prefixes,
043 * and columns indexed by suffixes. For a prefix <code>u</code> and a suffix <code>v</code>,
044 * the respective cell contains the result of the membership query <code>(u, v)</code>.
045 * <p>
046 * The set of prefixes (row labels) is divided into two disjoint sets: short and long prefixes.
047 * Each long prefix is a one-letter extension of a short prefix; conversely, every time a
048 * prefix is added to the set of short prefixes, all possible one-letter extensions are added
049 * to the set of long prefixes.
050 * <p>
051 * In order to derive a well-defined hypothesis from an observation table, it must satisfy two
052 * properties: closedness and consistency.
053 * <ul>
054 * <li>An observation table is <b>closed</b> iff for each long prefix <code>u</code> there exists
055 * a short prefix <code>u'</code> such that the row contents for both prefixes are equal.
056 * <li>An observation table is <b>consistent</b> iff for every two short prefixes <code>u</code> and
057 * <code>u'</code> with identical row contents, it holds that for every input symbol <code>a</code>
058 * the rows indexed by <code>ua</code> and <code>u'a</code> also have identical contents.  
059 * </ul>
060 * 
061 * @author Malte Isberner <malte.isberner@gmail.com>
062 *
063 * @param <I> input symbol class
064 * @param <O> output class
065 */
066public final class ObservationTable<I,O> implements AccessSequenceTransformer<I> {
067        
068        // private static final Integer NO_ENTRY = null;
069        private static final int NO_ENTRY = -1;
070        
071        
072        private final Alphabet<I> alphabet;
073        
074        private final List<Row<I>> shortPrefixRows
075                = new ArrayList<Row<I>>();
076        private final List<Row<I>> longPrefixRows
077                = new ArrayList<Row<I>>();
078        
079        private final List<Row<I>> allRows
080                = new ArrayList<Row<I>>();
081        
082        private final List<List<O>> allRowContents
083                = new ArrayList<List<O>>();
084        
085        private final List<Row<I>> canonicalRows
086                = new ArrayList<Row<I>>();
087        
088        /*
089        private final Map<List<O>,Integer> rowContentIds
090                = new HashMap<List<O>,Integer>();
091        */
092        
093        private final TObjectIntMap<List<O>> rowContentIds
094                = new TObjectIntHashMap<>(10, 0.75f, NO_ENTRY);
095        
096        private final Map<Word<I>,Row<I>> rowMap
097                = new HashMap<Word<I>,Row<I>>();
098        
099        private int numRows = 0;
100        
101        private final List<Word<I>> suffixes
102                = new ArrayList<Word<I>>();
103        
104        
105        /**
106         * Constructor.
107         * @param alphabet the learning alphabet.
108         */
109        public ObservationTable(Alphabet<I> alphabet) {
110                this.alphabet = alphabet;
111        }
112        
113        /**
114         * Initializes an observation table using a specified set of suffixes.
115         * 
116         * @param initialSuffixes the set of initial column labels.
117         * @param oracle the {@link MembershipOracle} to use for performing queries
118         * @return a list of equivalence classes of unclosed rows
119         */
120        public List<List<Row<I>>> initialize(List<Word<I>> initialSuffixes, MembershipOracle<I,O> oracle) {
121                if(allRows.size() > 0)
122                        throw new IllegalStateException("Called initialize, but there are already rows present");
123                
124                int numSuffixes = initialSuffixes.size();
125                suffixes.addAll(initialSuffixes);
126                
127                int numLps = alphabet.size();
128                int numPrefixes = 1 + numLps;
129                
130                List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(numPrefixes * numSuffixes);
131                
132                Word<I> eps = Word.epsilon();
133                Row<I> epsRow = createSpRow(Word.<I>epsilon());
134                
135                buildQueries(queries, eps, suffixes);
136                
137                for(int i = 0; i < alphabet.size(); i++) {
138                        I sym = alphabet.getSymbol(i);
139                        Word<I> w = Word.fromLetter(sym);
140                        Row<I> lpRow = createLpRow(w);
141                        buildQueries(queries, w, suffixes);
142                        epsRow.setSuccessor(i, lpRow);
143                }
144                
145                oracle.processQueries(queries);
146                
147                Iterator<DefaultQuery<I,O>> queryIt = queries.iterator();
148                
149                List<O> firstRowContents = new ArrayList<O>(numSuffixes);
150                fetchResults(queryIt, firstRowContents, numSuffixes);
151                processContents(epsRow, firstRowContents, true);
152                
153                List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>();
154                
155                for(Row<I> lpRow : longPrefixRows) {
156                        List<O> rowContents = new ArrayList<O>(numSuffixes);
157                        fetchResults(queryIt, rowContents, numSuffixes);
158                        if(processContents(lpRow, rowContents, false))
159                                unclosed.add(new ArrayList<Row<I>>());
160                        
161                        int id = lpRow.getRowContentId();
162                        
163                        if(id > 0)
164                                unclosed.get(id - 1).add(lpRow);
165                }
166                
167                return unclosed;
168        }
169        
170        /**
171         * Adds a suffix to the list of distinguishing suffixes. This is a convenience method
172         * that can be used as shorthand for
173         * <code>addSufixes(Collections.singletonList(suffix), oracle)</code>
174         * @param suffix the suffix to add
175         * @param oracle the membership oracle
176         * @return a list of equivalence classes of unclosed rows
177         */
178        public List<List<Row<I>>> addSuffix(Word<I> suffix, MembershipOracle<I,O> oracle) {
179                return addSuffixes(Collections.singletonList(suffix), oracle);
180        }
181        
182        /**
183         * Adds suffixes to the list of distinguishing suffixes.
184         * @param newSuffixes the suffixes to add
185         * @param oracle the membership oracle
186         * @return a list of equivalence classes of unclosed rows
187         */
188        public List<List<Row<I>>> addSuffixes(List<Word<I>> newSuffixes, MembershipOracle<I, O> oracle) {
189                int oldSuffixCount = suffixes.size();
190                int numNewSuffixes = newSuffixes.size();
191                
192                int numSpRows = shortPrefixRows.size();
193                int rowCount = numSpRows + longPrefixRows.size();
194                
195                List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(rowCount * numNewSuffixes);
196                
197                for(Row<I> row : shortPrefixRows)
198                        buildQueries(queries, row.getPrefix(), newSuffixes);
199                
200                for(Row<I> row : longPrefixRows)
201                        buildQueries(queries, row.getPrefix(), newSuffixes);
202                
203                oracle.processQueries(queries);
204                
205                Iterator<DefaultQuery<I,O>> queryIt = queries.iterator();
206                
207                for(Row<I> row : shortPrefixRows) {
208                        List<O> rowContents = allRowContents.get(row.getRowContentId());
209                        if(rowContents.size() == oldSuffixCount) {
210                                rowContentIds.remove(rowContents);
211                                fetchResults(queryIt, rowContents, numNewSuffixes);
212                                rowContentIds.put(rowContents, row.getRowContentId());
213                        }
214                        else {
215                                List<O> newContents = new ArrayList<O>(oldSuffixCount + numNewSuffixes);
216                                newContents.addAll(rowContents.subList(0, oldSuffixCount));
217                                fetchResults(queryIt, newContents, numNewSuffixes);
218                                processContents(row, newContents, true);
219                        }
220                }
221                
222                List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>();
223                numSpRows = numDistinctRows();
224                
225                for(Row<I> row : longPrefixRows) {
226                        List<O> rowContents = allRowContents.get(row.getRowContentId());
227                        if(rowContents.size() == oldSuffixCount) {
228                                rowContentIds.remove(rowContents);
229                                fetchResults(queryIt, rowContents, numNewSuffixes);
230                                rowContentIds.put(rowContents, row.getRowContentId());
231                        }
232                        else {
233                                List<O> newContents = new ArrayList<O>(oldSuffixCount + numNewSuffixes);
234                                newContents.addAll(rowContents.subList(0, oldSuffixCount));
235                                fetchResults(queryIt, newContents, numNewSuffixes);
236                                if(processContents(row, newContents, false))
237                                        unclosed.add(new ArrayList<Row<I>>());
238                                
239                                int id = row.getRowContentId();
240                                if(id >= numSpRows)
241                                        unclosed.get(id - numSpRows).add(row);
242                        }       
243                }
244                
245                this.suffixes.addAll(newSuffixes);
246                
247                return unclosed;
248        }
249        
250        
251        /**
252         * Moves the specified rows to the set of short prefix rows. If some of the specified
253         * rows already are short prefix rows, they are ignored (unless they do not have any
254         * contents, in which case they are completed).
255         * @param lpRows the rows to move to the set of short prefix rows
256         * @param oracle the membership oracle
257         * @return a list of equivalence classes of unclosed rows
258         */
259        public List<List<Row<I>>> toShortPrefixes(List<Row<I>> lpRows, MembershipOracle<I,O> oracle) {
260                List<Row<I>> freshSpRows = new ArrayList<Row<I>>();
261                List<Row<I>> freshLpRows = new ArrayList<Row<I>>();
262                
263                for(Row<I> row : lpRows) {
264                        if(row.isShortPrefix()) {
265                                if(row.hasContents())
266                                        continue;
267                                freshSpRows.add(row);
268                        }
269                        else {
270                                makeShort(row);
271                                if(!row.hasContents())
272                                        freshSpRows.add(row);
273                        }
274                        
275                        Word<I> prefix = row.getPrefix();
276                                                
277                        for(int i = 0; i < alphabet.size(); i++) {
278                                I sym = alphabet.getSymbol(i);
279                                Word<I> lp = prefix.append(sym);
280                                Row<I> lpRow = rowMap.get(lp);
281                                if(lpRow == null) {
282                                        lpRow = createLpRow(lp);
283                                        freshLpRows.add(lpRow);
284                                }
285                                row.setSuccessor(i, lpRow);
286                        }
287                }
288                
289                int numSuffixes = suffixes.size();
290                
291                int numFreshRows = freshSpRows.size() + freshLpRows.size();
292                List<DefaultQuery<I,O>> queries = new ArrayList<DefaultQuery<I,O>>(numFreshRows * numSuffixes);
293                buildRowQueries(queries, freshSpRows, suffixes);
294                buildRowQueries(queries, freshLpRows, suffixes);
295                
296                oracle.processQueries(queries);
297                Iterator<DefaultQuery<I,O>> queryIt = queries.iterator();
298                
299                for(Row<I> row : freshSpRows) {
300                        List<O> contents = new ArrayList<O>(numSuffixes);
301                        fetchResults(queryIt, contents, numSuffixes);
302                        processContents(row, contents, true);
303                }
304                
305                int numSpRows = numDistinctRows();
306                List<List<Row<I>>> unclosed = new ArrayList<List<Row<I>>>();
307                
308                for(Row<I> row : freshLpRows) {
309                        List<O> contents = new ArrayList<O>(numSuffixes);
310                        fetchResults(queryIt, contents, numSuffixes);
311                        if(processContents(row, contents, false))
312                                unclosed.add(new ArrayList<Row<I>>());
313                        
314                        int id = row.getRowContentId();
315                        if(id >= numSpRows)
316                                unclosed.get(id - numSpRows).add(row);
317                }
318                
319                return unclosed;
320        }
321        
322        public Row<I> getRowSuccessor(Row<I> row, I sym) {
323                return row.getSuccessor(alphabet.getSymbolIndex(sym));
324        }
325        
326        public List<List<Row<I>>> addShortPrefixes(List<Word<I>> shortPrefixes, MembershipOracle<I,O> oracle) {     
327                List<Row<I>> toSpRows = new ArrayList<Row<I>>();
328                
329                for(Word<I> sp : shortPrefixes) {
330                        Row<I> row = rowMap.get(sp);
331                        if(row != null) {
332                                if(row.isShortPrefix())
333                                        continue;
334                        }
335                        else
336                                row = createSpRow(sp);
337                        toSpRows.add(row);
338                }
339                
340                return toShortPrefixes(toSpRows, oracle);
341        }
342        
343        
344        @SuppressWarnings("unchecked")
345        public Inconsistency<I,O> findInconsistency() {
346                Row<I>[] canonicRows = (Row<I>[])new Row<?>[numDistinctRows()];
347                
348                for(Row<I> spRow : shortPrefixRows) {
349                        int contentId = spRow.getRowContentId();
350                        
351                        Row<I> canRow = canonicRows[contentId];
352                        if(canRow == null) {
353                                canonicRows[contentId] = spRow;
354                                continue;
355                        }
356                        
357                        for(int i = 0; i < alphabet.size(); i++) {
358                                int spSuccContent = spRow.getSuccessor(i).getRowContentId();
359                                int canSuccContent = canRow.getSuccessor(i).getRowContentId();
360                                if(spSuccContent != canSuccContent)
361                                        return new Inconsistency<I,O>(canRow, spRow, i);
362                        }
363                }
364                
365                return null;
366        }
367        
368        
369        protected boolean makeShort(Row<I> row) {
370                if(row.isShortPrefix())
371                        return false;
372                
373                int lastIdx = longPrefixRows.size() - 1;
374                Row<I> last = longPrefixRows.get(lastIdx);
375                int rowIdx = row.getLpIndex();
376                longPrefixRows.remove(lastIdx);
377                if(last != row) {
378                        longPrefixRows.set(rowIdx, last);
379                        last.setLpIndex(rowIdx);
380                }
381                
382                shortPrefixRows.add(row);
383                row.makeShort(alphabet.size());
384                
385                if(row.hasContents()) {
386                        int cid = row.getRowContentId();
387                        if(canonicalRows.get(cid) == null)
388                                canonicalRows.set(cid, row);
389                }
390                return true;
391        }
392        
393        protected Row<I> createLpRow(Word<I> prefix) {
394                Row<I> newRow = new Row<I>(prefix, numRows++);
395                allRows.add(newRow);
396                rowMap.put(prefix, newRow);
397                int idx = longPrefixRows.size();
398                longPrefixRows.add(newRow);
399                newRow.setLpIndex(idx);
400                return newRow;
401        }
402        
403        protected Row<I> createSpRow(Word<I> prefix) {
404                Row<I> newRow = new Row<I>(prefix, numRows++, alphabet.size());
405                allRows.add(newRow);
406                rowMap.put(prefix, newRow);
407                shortPrefixRows.add(newRow);
408                return newRow;
409        }
410        
411        public List<O> rowContents(Row<I> row) {
412                return allRowContents.get(row.getRowContentId());
413        }
414        
415        public O cellContents(Row<I> row, int columnId) {
416                List<O> contents = rowContents(row);
417                return contents.get(columnId);
418        }
419        
420        public Row<I> getRow(int rowId) {
421                return allRows.get(rowId);
422        }
423        
424        public int numDistinctRows() {
425                return allRowContents.size();
426        }
427        
428        public List<Row<I>> getShortPrefixRows() {
429                return shortPrefixRows;
430        }
431
432        public int numShortPrefixRows() {
433                return shortPrefixRows.size();
434        }
435        
436        public int numLongPrefixRows() {
437                return longPrefixRows.size();
438        }
439        
440        public int numTotalRows() {
441                return shortPrefixRows.size() + longPrefixRows.size();
442        }
443        
444        public int numSuffixes() {
445                return suffixes.size();
446        }
447        
448        public List<Word<I>> getSuffixes() {
449                return suffixes;
450        }
451        
452        protected boolean processContents(Row<I> row, List<O> rowContents, boolean makeCanonical) {
453                // Integer contentId;
454                int contentId;
455                boolean added = false;
456                if((contentId = rowContentIds.get(rowContents)) == NO_ENTRY) {
457                        rowContentIds.put(rowContents, contentId = numDistinctRows());
458                        allRowContents.add(rowContents);
459                        added = true;
460                        if(makeCanonical)
461                                canonicalRows.add(row);
462                        else
463                                canonicalRows.add(null);
464                }
465                row.setRowContentId(contentId);
466                return added;
467        }
468        
469        protected static <I,O>
470        void buildQueries(List<DefaultQuery<I,O>> queryList, List<Word<I>> prefixes, List<Word<I>> suffixes) {
471                for(Word<I> prefix : prefixes)
472                        buildQueries(queryList, prefix, suffixes);
473        }
474        
475        protected static <I,O>
476        void buildRowQueries(List<DefaultQuery<I,O>> queryList, List<Row<I>> rows, List<Word<I>> suffixes) {
477                for(Row<I> row : rows)
478                        buildQueries(queryList, row.getPrefix(), suffixes);
479        }
480        
481        protected static <I,O>
482        void buildQueries(List<DefaultQuery<I,O>> queryList, Word<I> prefix, List<Word<I>> suffixes) {
483                for(Word<I> suffix : suffixes)
484                        queryList.add(new DefaultQuery<I,O>(prefix, suffix));
485        }
486        
487        /**
488         * Fetches the given number of query responses and adds them to the specified output list.
489         * Also, the query iterator is advanced accordingly.
490         * @param queryIt the query iterator
491         * @param output the output list to write to
492         * @param numSuffixes the number of suffixes (queries)
493         */
494        protected static <I,O>
495        void fetchResults(Iterator<DefaultQuery<I,O>> queryIt, List<O> output, int numSuffixes) {
496                for(int j = 0; j < numSuffixes; j++) {
497                        DefaultQuery<I,O> qry = queryIt.next();
498                        output.add(qry.getOutput());
499                }
500        }
501
502        /**
503         * Checks whether this observation table has been initialized yet (i.e., contains any rows).
504         * @return <tt>true</tt> iff the table has been initialized
505         */
506        public boolean isInitialized() {
507                return (allRows.size() > 0);
508        }
509
510        /**
511         * Retrieves the input alphabet used in this observation table.
512         * @return the input alphabet
513         */
514        public Alphabet<I> getInputAlphabet() {
515                return alphabet;
516        }
517
518        /*
519         * (non-Javadoc)
520         * @see de.learnlib.api.AccessSequenceTransformer#transformAccessSequence(net.automatalib.words.Word)
521         */
522        @Override
523        public Word<I> transformAccessSequence(Word<I> word) {
524                Row<I> current = shortPrefixRows.get(0);
525                
526                for(I sym : word) {
527                        current = getRowSuccessor(current, sym);
528                        if(!current.isShortPrefix())
529                                current = canonicalRows.get(current.getRowContentId());
530                }
531                
532                return current.getPrefix();
533        }
534
535        /*
536         * (non-Javadoc)
537         * @see de.learnlib.api.AccessSequenceTransformer#isAccessSequence(net.automatalib.words.Word)
538         */
539        @Override
540        public boolean isAccessSequence(Word<I> word) {
541                Row<I> current = shortPrefixRows.get(0);
542                
543                for(I sym : word) {
544                        current = getRowSuccessor(current, sym);
545                        if(!current.isShortPrefix())
546                                return false;
547                }
548                
549                return true;
550        }
551}