View Javadoc

1   package org.kit.furia;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileReader;
6   import java.io.FilenameFilter;
7   import java.io.IOException;
8   import java.util.Arrays;
9   import java.util.Iterator;
10  import java.util.List;
11  
12  import org.ajmm.obsearch.asserts.OBAsserts;
13  import org.ajmm.obsearch.example.HelpException;
14  import org.ajmm.obsearch.exception.AlreadyFrozenException;
15  import org.ajmm.obsearch.exception.IllegalIdException;
16  import org.ajmm.obsearch.exception.OBException;
17  import org.ajmm.obsearch.exception.OutOfRangeException;
18  import org.ajmm.obsearch.index.IndexFactory;
19  import org.ajmm.obsearch.index.IndexShort;
20  import org.ajmm.obsearch.index.PPTreeShort;
21  import org.ajmm.obsearch.index.UnsafeNCorePPTreeShort;
22  import org.ajmm.obsearch.index.UnsafePPTreeShort;
23  import org.ajmm.obsearch.index.pivotselection.AcceptAll;
24  import org.ajmm.obsearch.index.pivotselection.KMeansPPPivotSelector;
25  import org.ajmm.obsearch.ob.OBShort;
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.commons.cli.Option;
28  import org.apache.commons.cli.OptionBuilder;
29  import org.apache.commons.cli.Options;
30  import org.apache.commons.cli.ParseException;
31  import org.apache.log4j.LogManager;
32  import org.apache.log4j.Logger;
33  import org.kit.furia.exceptions.IRException;
34  import org.kit.furia.fragment.OBFragment;
35  import org.kit.furia.index.FIRIndexShort;
36  import org.kit.furia.io.FuriaInputOBFragment;
37  
38  import com.sleepycat.je.DatabaseException;
39  
40  /*
41   Furia-chan: An Open Source software license violation detector.    
42   Copyright (C) 2008 Kyushu Institute of Technology
43  
44   This program is free software: you can redistribute it and/or modify
45   it under the terms of the GNU General Public License as published by
46   the Free Software Foundation, either version 3 of the License, or
47   (at your option) any later version.
48  
49   This program is distributed in the hope that it will be useful,
50   but WITHOUT ANY WARRANTY; without even the implied warranty of
51   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
52   GNU General Public License for more details.
53  
54   You should have received a copy of the GNU General Public License
55   along with this program.  If not, see <http://www.gnu.org/licenses/>.
56   */
57  
58  /**
59   * FuriaChan This class wraps IRIndex and OBSearch to perform matches on binary
60   * programs. This class contains the ideas presented in the paper:
61   * 
62   * <pre>
63   *  Fast Approximate Matching of Programs for Protecting
64   *  Libre/Open Source Software by Using Spatial Indexes 
65   *  Arnoldo Jose Muller Molina and Shinohara, Takeshi 
66   *  Kyushu Institute of Technology, Japan.
67   *  In: Source Code Analysis and Manipulation, 2007. SCAM 2007.
68   * </pre>
69   * 
70   * The only difference is that instead of a spatial index, we use an asymmetric
71   * P+Tree (OBSearch). This class operates on folders of folders that contain
72   * fragment files, or with fragment files directly. In any case, it can load
73   * applications or search fragment multi-sets The class can be invoked from the
74   * command line. Each method will explains the command line parameters required
75   * to invoke the method. In general, it is faster to operate on folders of
76   * folders of fragment files. This is because the databases have to be loaded
77   * each time the program is started. Additionally, OBsearch takes advantage of
78   * frequently accessed objects that are kept in a cache. Since we are dealing
79   * with trees, it makes a huge difference to use this cache. This program has
80   * two modes, insert mode and search mode. - In insert mode, one or more
81   * fragmented applications are added to the database. - In search mode, queries
82   * of fragments searched in the database, and the corresponding binary program
83   * similarity results are returned. Before search mode can be used, a "freeze"
84   * operation must be performed so that OBSearch can efficiently search trees. It
85   * is recommended to freeze the database after many fragmented applications have
86   * been inserted. It will take some time, but it is a one time operation. Insert
87   * mode and search mode work in two modes: Single application mode: One program
88   * is inserted/searched. Directory of applications mode: A directory that
89   * contains directories with fragmented applications is inserted/searched. The
90   * second is the recommended mode. This is because in single application mode,
91   * all the database has to be loaded several times.
92   * @author Arnoldo Jose Muller Molina
93   */
94  
95  public class FuriaChan
96          extends AbstractFuriaChanCommandLine {
97  
98      private static final Logger logger = Logger.getLogger("FuriaChan");
99  
100     public static void main(String[] args) throws Exception {
101         int returnValue = 0;
102         FuriaChanEngine engine = null;
103         try {
104 
105             initLogger();
106             final CommandLine cline = getCommandLine(initCommandLine(),
107                     FuriaChan.class, args);
108 
109             File db = new File(cline.getOptionValue("db"));
110 
111             logger.info("All your source are belong to us!");
112 
113             engine = new FuriaChanEngine(db);
114             if(cline.hasOption("freeze")){
115                 OBAsserts.chkFileExists(db);
116                 //throw new Exception("Cannot freeze now at this point. The first insert will freeze so make sure it has a bunch of apps");
117                 engine.freeze();
118             }else{ 
119                 File input = new File(cline.getOptionValue("input"));
120                 OBAsserts.chkFileExists(db);
121                 OBAsserts.chkFileExists(input);
122                 if(cline.hasOption("load")){ // load data into the DB
123                     engine.insert(input);
124                     //engine.freeze();
125                 }else if(cline.hasOption("search")){ // search for
126                     // setting defaults
127                     engine.setK((byte)1);
128                     engine.setR((short)1);
129                     engine.setN((short)10);
130                     engine.setMSetScoreThreshold(0.32f);
131                     engine.setSetScoreThreshold(0.04f);
132                     
133                     if(cline.hasOption("k")){
134                         engine.setK(Byte.parseByte(cline.getOptionValue("k")));
135                     }
136                     if(cline.hasOption("r")){
137                         engine.setR(Short.parseShort(cline.getOptionValue("r")));
138                     }
139                     
140                     if(cline.hasOption("n")){
141                         engine.setN(Short.parseShort(cline.getOptionValue("n")));
142                     }
143                     if(cline.hasOption("validate")){
144                         engine.setValidate(true);
145                     }
146                     if(cline.hasOption("msetT")){
147                         engine.setMSetScoreThreshold(Float.parseFloat(cline.getOptionValue("msetT")));
148                     }
149                     if(cline.hasOption("setT")){
150                         engine.setSetScoreThreshold(Float.parseFloat(cline.getOptionValue("setT")));
151                     }
152                     engine.search(input);
153                 }else{
154                     throw new IllegalArgumentException("Operation mode is missing. Accepted values: search, load, learn");
155                 }
156             }
157             logger.info("For great justice!");
158         } catch (final ParseException exp) {
159             logger.fatal("Argument parsing failed args: "
160                     + Arrays.toString(args), exp);
161             returnValue = 84;
162         } catch (final HelpException exp) {
163             // no problem, we just display the help and quit
164             logger.debug("Should have shown the help msg");
165         } catch (final Exception e) {
166             logger.fatal("Exception caught", e);
167             returnValue = 83;
168             
169         } 
170         if(engine != null){
171             engine.close();
172         }
173         
174         LogManager.shutdown();
175         System.exit(returnValue);
176     }
177 
178     /**
179      * Initializes the command line definition. Here we define all the command
180      * line options to be received by the program.
181      * @return The options of the program.
182      */
183     public static Options initCommandLine() {
184 
185         final Option in = OptionBuilder
186                 .withArgName("dir")
187                 .hasArg()
188                 .isRequired(false)
189                 .withDescription(
190                         "Input directory where fragments (or directories with fragments) are located")
191                 .create("input");
192 
193         final Option search = new Option(
194                 "search",
195                 "Enables search mode. The n most similar programs will be returned for the given inputs");
196         search.setRequired(false);
197 
198         final Option load = new Option("load",
199                 "Enables the loading of data in the database. The input option is required");
200         search.setRequired(false);
201 
202         final Option freeze = new Option(
203                 "freeze",
204                 "OBSearch 'Leans' the database so that queries can be performed faster. This operation must be executed once and it must be executed before searching for license violations!");
205         search.setRequired(false);
206         
207         final Option validate = new Option(
208                 "validate",
209                 "Used to generate statistics of the quality of the results Furia-chan gives. Assumes that input contains applications whose names (folder names) correspond to names of files in the database");
210         search.setRequired(false);
211 
212         final Option db = OptionBuilder.withArgName("dir").hasArg().isRequired(
213                 true).withDescription("Directory in which the DB is located")
214                 .create("db");
215 
216         final Option r = OptionBuilder.withArgName("#").hasArg().isRequired(
217                 false).withDescription(
218                 "Range to use. Only useful in search mode").create("r");
219         final Option k = OptionBuilder.withArgName("#").hasArg().isRequired(
220                 false).withDescription("k for the nearest neighbor search")
221                 .create("k");
222         final Option n = OptionBuilder.withArgName("#").hasArg().isRequired(
223                 false).withDescription(
224                 "Retrieve the top n closest programs only").create("n");
225         
226         final Option msetT = OptionBuilder.withArgName("#").hasArg().isRequired(
227                 false).withDescription(
228                 "Multi-set score threshold").create("msetT");
229         
230         final Option setT = OptionBuilder.withArgName("#").hasArg().isRequired(
231                 false).withDescription(
232                 "Set score threshold").create("setT");
233 
234         Options options = new Options();
235         options.addOption(in);
236         options.addOption(db);
237         options.addOption(n);
238         options.addOption(k);
239         options.addOption(r);
240         options.addOption(freeze);
241         options.addOption(search);
242         options.addOption(load);
243         options.addOption(validate);
244         options.addOption(msetT);
245         options.addOption(setT);
246         return options;
247     }
248 
249 }