39 string PROJECT_PATH_MESSAGE =
"Enter your project path:";
40 string MINIMUM_SIMILARITY_MESSAGE =
"Enter minimum similarity desired on using the tool:";
41 string CONFIG_PATH =
"config.txt";
42 string PATH_MESSAGE =
"path of the current preprocess: ";
43 string TIME_MESSAGE =
"Finished time: ";
46 string INITIAL_MESSAGE =
"Initiating Preprocessing";
47 string BREAKER_MESSAGE =
"Reading codebase... (this may take a while)";
48 string DUPLICATION_MESSAGE =
"Finding duplication in the codebase... (this may take a while)";
49 string SAVING_MESSAGE =
"Saving results...";
50 string END_MESSAGE =
"Finished preprocessing";
53 string MESSAGE_DUPLICATION_FINDER_TYPE_1 =
"Enter the number of the duplication finder technique you want to use:";
54 string MESSAGE_DUPLICATION_FINDER_TYPE_2 =
"1) NLP text similarity using gensim";
55 string MESSAGE_DUPLICATION_FINDER_TYPE_3 =
"2) Count proportion of equal lines using diff command";
56 string INVALID_CODE_DUPLICATION_FINDER =
"Valid options are '1' or '2' only. Stopping Program...";
65 tuple<string,double,bool> read_parameters();
71 void save_current_run_params(
string path);
79 void preprocess(
string path,
double similarity,
bool use_duplication_finder_by_tool);
95 Preprocessor(
bool force_preprocess,
string path,
double similarity);