View Javadoc

1   package org.kit.furia.index;
2   
3   import java.io.File;
4   import java.io.IOException;
5   import java.util.HashMap;
6   import java.util.Iterator;
7   import java.util.List;
8   import java.util.Map;
9   import java.util.PriorityQueue;
10  
11  import org.ajmm.obsearch.Index;
12  import org.ajmm.obsearch.index.IndexShort;
13  import org.ajmm.obsearch.ob.OBShort;
14  import org.ajmm.obsearch.result.OBPriorityQueueShort;
15  import org.ajmm.obsearch.result.OBResultShort;
16  import org.apache.log4j.Logger;
17  import org.kit.furia.Document;
18  import org.kit.furia.ResultCandidate;
19  import org.kit.furia.Document.DocumentElement;
20  import org.kit.furia.exceptions.IRException;
21  
22  import com.sleepycat.je.DatabaseException;
23  
24  /*
25   Furia-chan: An Open Source software license violation detector.    
26   Copyright (C) 2007 Kyushu Institute of Technology
27  
28   This program is free software: you can redistribute it and/or modify
29   it under the terms of the GNU General Public License as published by
30   the Free Software Foundation, either version 3 of the License, or
31   (at your option) any later version.
32  
33   This program is distributed in the hope that it will be useful,
34   but WITHOUT ANY WARRANTY; without even the implied warranty of
35   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
36   GNU General Public License for more details.
37  
38   You should have received a copy of the GNU General Public License
39   along with this program.  If not, see <http://www.gnu.org/licenses/>.
40   */
41  
42  /**
43   * FIRIndexShort uses IR techniques to match OB objects (OBSearch objects) when
44   * the objects extend from OBShort.
45   * @author Arnoldo Jose Muller Molina
46   * @since 0
47   */
48  public class FIRIndexShort < O extends OBShort >
49          extends AbstractIRIndex < O > implements
50          org.kit.furia.IRIndexShort < O > {
51      
52      private static final Logger logger = Logger.getLogger(FIRIndexShort.class.getSimpleName());
53  
54      /**
55       * 
56       */
57      private IndexShort < O > index;
58  
59      /**
60       * Creates a new IR Index that works on shorts
61       * @param dbFolder
62       *                The folder in which Lucene's files will be stored
63       * @throws IOException
64       *                 If the given directory does not exist or if some other IO
65       *                 error occurs
66       */
67      public FIRIndexShort(IndexShort < O > index, File dbFolder)
68              throws IOException {
69          super(dbFolder);
70          this.index = index;
71      }
72      // TODO: re-write this as an iterator to lazily extract the results. 
73      public final List < ResultCandidate > search(Document < O > document, byte k,
74              short r, short n) throws IRException{
75          Iterator < Document < O >.DocumentElement < O >> it = document
76                  .iterator();
77          // we transform now the given document, to a document that is in terms
78          // of the
79          // fragments available in the database.
80          // we store term id -> term freq. This will be used to create the query.
81          Map<Integer, Integer> documentInTermsOfTheDatabase = new HashMap<Integer, Integer>(document.size() * k);
82  
83          while (it.hasNext()) {
84              Document < O >.DocumentElement < O > elem = it.next();
85              O toMatch = elem.getObject();
86              OBPriorityQueueShort < O > result = new OBPriorityQueueShort < O >(
87                      k);
88              try{
89                  // match the object in the database.
90                  index.searchOB(toMatch, r, result);
91  
92                  // for all the returned elements, we add their ids and the initial
93                  // count that came from "document".
94                  Iterator<OBResultShort<O>> itO = result.iterator();
95                  while(itO.hasNext()){
96                      OBResultShort<O> match = itO.next();
97                      Integer exists = documentInTermsOfTheDatabase.get(match.getId());
98                      if(exists == null){
99                          documentInTermsOfTheDatabase.put(match.getId(), elem.getCount());
100                     }else{
101                         documentInTermsOfTheDatabase.put(match.getId(), elem.getCount() + exists);
102                     }
103                 }
104                 
105                
106                 
107             }catch(Exception e){
108                 logger.fatal("Fatal error while searching" , e);
109                 throw new IRException(e);
110             }
111             
112         }
113         return processQueryResults(documentInTermsOfTheDatabase,n, document);
114     }
115     
116     
117     public Index < O > getIndex() {
118         return index;
119     }
120     
121     public int getWordsSize() throws DatabaseException{
122         return this.index.databaseSize();
123     }
124 
125 }