Arkanjo 0.2
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
big_clone_tailor_evaluator.cpp
Go to the documentation of this file.
2
3void BigCloneTailorEvaluator::read_clone_labels() {
4 count_of_samples_by_type = vector<int>(NUMBER_OF_TYPES);
5 vector<string> content = Utils::read_file_generic(CLONE_LABELS_FILE_PATH);
6 for (auto line : content) {
7 vector<string> tokens = Utils::split_string(line, ',');
8 if (int(tokens.size()) < 4) {
9 continue;
10 }
11 int id0 = stoi(tokens[0]);
12 int id1 = stoi(tokens[1]);
13 int type = stoi(tokens[3]);
14 if (id0 > id1) {
15 swap(id0, id1);
16 }
17 pair<int, int> aux = {id0, id1};
18 id_pair_to_type[aux] = type;
19 count_of_samples_by_type[type] += 1;
20 }
21}
22
23int BigCloneTailorEvaluator::path_to_id(Path path) {
24 string relative_path = path.build_relative_path();
25 vector<string> tokens = Utils::split_string(relative_path, '/');
26 string file_name = tokens.back();
27 for (int i = 0; i < int(EXTENSION.size()); i++) {
28 file_name.pop_back();
29 }
30 return stoi(file_name);
31}
32
33vector<tuple<double, int, int>> BigCloneTailorEvaluator::similar_path_pairs_formated_with_id() {
34 auto similar_path_pairs = similarity_table->get_all_path_pairs_and_similarity_sorted_by_similarity();
35 vector<tuple<double, int, int>> ret;
36 for (auto [similarity, path0, path1] : similar_path_pairs) {
37 int id0 = path_to_id(path0);
38 int id1 = path_to_id(path1);
39 if (id0 > id1) {
40 swap(id0, id1);
41 }
42 ret.push_back({similarity, id0, id1});
43 }
44 return ret;
45}
46
47bool BigCloneTailorEvaluator::is_relevant_pair(int id0, int id1) {
48 pair<int, int> ids = {id0, id1};
49 return id_pair_to_type.find(ids) != id_pair_to_type.end();
50}
51
52set<pair<int, int>> BigCloneTailorEvaluator::filter_similar_id_pairs_only_relevant_ones(
53 vector<pair<int, int>> similar_id_pairs) {
54 set<pair<int, int>> ret;
55 for (auto [id0, id1] : similar_id_pairs) {
56 if (is_relevant_pair(id0, id1)) {
57 ret.insert({id0, id1});
58 }
59 }
60 return ret;
61}
62
63vector<pair<int, int>> BigCloneTailorEvaluator::filter_similar_path_pairs_by_similarity(
64 vector<tuple<double, int, int>> similar_id_pairs,
65 double minimum_similarity) {
66 vector<pair<int, int>> ret;
67 for (auto [similarity, id0, id1] : similar_id_pairs) {
68 if (similarity >= minimum_similarity) {
69 ret.push_back({id0, id1});
70 }
71 }
72 return ret;
73}
74
75vector<int> BigCloneTailorEvaluator::build_frequency_corrected_guessed_by_type(
76 vector<pair<int, int>> similar_id_pairs) {
77 set<pair<int, int>> similar_id_pairs_set = filter_similar_id_pairs_only_relevant_ones(similar_id_pairs);
78 vector<int> frequency(NUMBER_OF_TYPES);
79 for (auto ids : similar_id_pairs_set) {
80 frequency[id_pair_to_type[ids]] += 1;
81 }
82 // for not clone if it is marked as duplicate count is wrong instead of right
83 frequency[NOT_CLONE_TYPE_ID] *= -1;
84 frequency[NOT_CLONE_TYPE_ID] += count_of_samples_by_type[NOT_CLONE_TYPE_ID];
85 return frequency;
86}
87
88double BigCloneTailorEvaluator::calc_recall(vector<int> frequency, int type) {
89 double TP = frequency[type];
90 double FN = count_of_samples_by_type[type] - frequency[type];
91 double recall = TP / (TP + FN);
92 return recall;
93}
94
95void BigCloneTailorEvaluator::print_recall_per_type(vector<int> frequency) {
96 cout << RECALL_PER_TYPE_PRINT << '\n';
97 for (int type = 0; type < NUMBER_OF_TYPES; type++) {
98 double recall = calc_recall(frequency, type);
99 cout << ID_TO_TYPE_LABEL[type] << ' ';
100 cout << fixed << setprecision(2) << recall << '\n';
101 }
102}
103
104void BigCloneTailorEvaluator::evaluate(double minimum_similarity) {
105 vector<tuple<double, int, int>> similar_id_pairs_similarity = similar_path_pairs_formated_with_id();
106 vector<pair<int, int>> similar_id_pairs = filter_similar_path_pairs_by_similarity(
107 similar_id_pairs_similarity,
108 minimum_similarity);
109 vector<int> frequency = build_frequency_corrected_guessed_by_type(similar_id_pairs);
110 print_recall_per_type(frequency);
111}
112
113BigCloneTailorEvaluator::BigCloneTailorEvaluator(Similarity_Table* _similarity_table) {
114 similarity_table = _similarity_table;
115}
116
117bool BigCloneTailorEvaluator::validate([[maybe_unused]] const ParsedOptions& options) {
118 return true;
119}
120
121bool BigCloneTailorEvaluator::run([[maybe_unused]] const ParsedOptions& options) {
122 read_clone_labels();
123 evaluate(MINIMUM_SIMILARITY_TEMP);
124
125 return true;
126}
BigCloneBench evaluation interface * Provides evaluation metrics compatible with BigCloneBench datase...
bool validate(const ParsedOptions &options) override
Validate the arguments already analyzed.
bool run(const ParsedOptions &options) override
Handles BigCloneBench evaluation command.
Path manipulation class for tool-specific directory structure.
Definition path.hpp:27
std::string build_relative_path() const
Builds relative path portion.
Definition path.cpp:101
Represents a similarity graph between functions (paths).
std::vector< std::tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
std::vector< std::string > split_string(const std::string &s, char delimiter)
Splits a string by a delimiter into tokens.
Definition utils.cpp:71
std::vector< std::string > read_file_generic(const fs::path &string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:13