Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
big_clone_tailor_evaluator.cpp
Go to the documentation of this file.
2
3void Big_Clone_Tailor_Evaluator::read_clone_labels(){
4 count_of_samples_by_type = vector<int>(NUMBER_OF_TYPES);
5 vector<string> content = Utils::read_file_generic(CLONE_LABELS_FILE_PATH);
6 for(auto line : content){
7 vector<string> tokens = Utils::split_string(line,',');
8 if(int(tokens.size()) < 4){
9 continue;
10 }
11 int id0 = stoi(tokens[0]);
12 int id1 = stoi(tokens[1]);
13 int type = stoi(tokens[3]);
14 if(id0 > id1){
15 swap(id0,id1);
16 }
17 pair<int,int> aux = {id0,id1};
18 id_pair_to_type[aux] = type;
19 count_of_samples_by_type[type] += 1;
20 }
21}
22
23int Big_Clone_Tailor_Evaluator::path_to_id(Path path){
24 string relative_path = path.build_relative_path();
25 vector<string> tokens = Utils::split_string(relative_path,'/');
26 string file_name = tokens.back();
27 for(int i = 0; i < int(EXTENSION.size()); i++){
28 file_name.pop_back();
29 }
30 return stoi(file_name);
31}
32
33vector<tuple<double,int,int>> Big_Clone_Tailor_Evaluator::similar_path_pairs_formated_with_id(){
34 auto similar_path_pairs = similarity_table->get_all_path_pairs_and_similarity_sorted_by_similarity();
35 vector<tuple<double,int,int>> ret;
36 for(auto [similarity,path0,path1] : similar_path_pairs){
37 int id0 = path_to_id(path0);
38 int id1 = path_to_id(path1);
39 if(id0 > id1){
40 swap(id0,id1);
41 }
42 ret.push_back({similarity,id0,id1});
43 }
44 return ret;
45}
46
47bool Big_Clone_Tailor_Evaluator::is_relevant_pair(int id0, int id1){
48 pair<int,int> ids = {id0,id1};
49 return id_pair_to_type.find(ids) != id_pair_to_type.end();
50}
51
52set<pair<int,int>> Big_Clone_Tailor_Evaluator::filter_similar_id_pairs_only_relevant_ones(
53 vector<pair<int,int>> similar_id_pairs){
54 set<pair<int,int>> ret;
55 for(auto [id0,id1] : similar_id_pairs){
56 if(is_relevant_pair(id0,id1)){
57 ret.insert({id0,id1});
58 }
59 }
60 return ret;
61}
62
63vector<pair<int,int>> Big_Clone_Tailor_Evaluator::filter_similar_path_pairs_by_similarity(
64 vector<tuple<double,int,int>> similar_id_pairs,
65 double minimum_similarity){
66 vector<pair<int,int>> ret;
67 for(auto [similarity,id0,id1] : similar_id_pairs){
68 if(similarity >= minimum_similarity){
69 ret.push_back({id0,id1});
70 }
71 }
72 return ret;
73}
74
75vector<int> Big_Clone_Tailor_Evaluator::build_frequency_corrected_guessed_by_type(
76 vector<pair<int,int>> similar_id_pairs){
77 set<pair<int,int>> similar_id_pairs_set = filter_similar_id_pairs_only_relevant_ones(similar_id_pairs);
78 vector<int> frequency(NUMBER_OF_TYPES);
79 for(auto ids : similar_id_pairs_set){
80 frequency[id_pair_to_type[ids]] += 1;
81 }
82 //for not clone if it is marked as duplicate count is wrong instead of right
83 frequency[NOT_CLONE_TYPE_ID] *= -1;
84 frequency[NOT_CLONE_TYPE_ID] += count_of_samples_by_type[NOT_CLONE_TYPE_ID];
85 return frequency;
86}
87
88double Big_Clone_Tailor_Evaluator::calc_recall(vector<int> frequency, int type){
89 double TP = frequency[type];
90 double FN = count_of_samples_by_type[type] - frequency[type];
91 double recall = TP/(TP+FN);
92 return recall;
93}
94
95void Big_Clone_Tailor_Evaluator::print_recall_per_type(vector<int> frequency){
96 cout << RECALL_PER_TYPE_PRINT << '\n';
97 for(int type = 0; type < NUMBER_OF_TYPES; type++){
98 double recall = calc_recall(frequency,type);
99 cout << ID_TO_TYPE_LABEL[type] << ' ';
100 cout << fixed << setprecision(2) << recall << '\n';
101 }
102}
103
104void Big_Clone_Tailor_Evaluator::evaluate(double minimum_similarity){
105 vector<tuple<double,int,int>> similar_id_pairs_similarity = similar_path_pairs_formated_with_id();
106 vector<pair<int,int>> similar_id_pairs = filter_similar_path_pairs_by_similarity(
107 similar_id_pairs_similarity,
108 minimum_similarity);
109 vector<int> frequency = build_frequency_corrected_guessed_by_type(similar_id_pairs);
110 print_recall_per_type(frequency);
111}
112
114 similarity_table = _similarity_table;
115 read_clone_labels();
116 evaluate(MINIMUM_SIMILARITY_TEMP);
117}
BigCloneBench evaluation interface * Provides evaluation metrics compatible with BigCloneBench datase...
Big_Clone_Tailor_Evaluator(Similarity_Table *_similarity_table)
Constructs evaluator with similarity data.
Path manipulation class for tool-specific directory structure.
Definition path.hpp:27
string build_relative_path()
Builds relative path portion.
Definition path.cpp:112
Manages and analyzes function similarity relationships.
vector< tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
return ret
Definition sum.c:3
vector< string > split_string(string s, char delimiter)
Splits a string by a delimiter into tokens.
Definition utils.cpp:119
vector< string > read_file_generic(string string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:19
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression, cppcoreguidelines-noexcept-swap, performance-noexcept-swap) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:25398