3void BigCloneTailorEvaluator::read_clone_labels() {
4 count_of_samples_by_type = vector<int>(NUMBER_OF_TYPES);
6 for (
auto line : content) {
8 if (
int(tokens.size()) < 4) {
11 int id0 = stoi(tokens[0]);
12 int id1 = stoi(tokens[1]);
13 int type = stoi(tokens[3]);
17 pair<int, int> aux = {id0, id1};
18 id_pair_to_type[aux] = type;
19 count_of_samples_by_type[type] += 1;
23int BigCloneTailorEvaluator::path_to_id(
Path path) {
26 string file_name = tokens.back();
27 for (
int i = 0; i < int(EXTENSION.size()); i++) {
30 return stoi(file_name);
33vector<tuple<double, int, int>> BigCloneTailorEvaluator::similar_path_pairs_formated_with_id() {
35 vector<tuple<double, int, int>> ret;
36 for (
auto [similarity, path0, path1] : similar_path_pairs) {
37 int id0 = path_to_id(path0);
38 int id1 = path_to_id(path1);
42 ret.push_back({similarity, id0, id1});
47bool BigCloneTailorEvaluator::is_relevant_pair(
int id0,
int id1) {
48 pair<int, int> ids = {id0, id1};
49 return id_pair_to_type.find(ids) != id_pair_to_type.end();
52set<pair<int, int>> BigCloneTailorEvaluator::filter_similar_id_pairs_only_relevant_ones(
53 vector<pair<int, int>> similar_id_pairs) {
54 set<pair<int, int>> ret;
55 for (
auto [id0, id1] : similar_id_pairs) {
56 if (is_relevant_pair(id0, id1)) {
57 ret.insert({id0, id1});
63vector<pair<int, int>> BigCloneTailorEvaluator::filter_similar_path_pairs_by_similarity(
64 vector<tuple<double, int, int>> similar_id_pairs,
65 double minimum_similarity) {
66 vector<pair<int, int>> ret;
67 for (
auto [similarity, id0, id1] : similar_id_pairs) {
68 if (similarity >= minimum_similarity) {
69 ret.push_back({id0, id1});
75vector<int> BigCloneTailorEvaluator::build_frequency_corrected_guessed_by_type(
76 vector<pair<int, int>> similar_id_pairs) {
77 set<pair<int, int>> similar_id_pairs_set = filter_similar_id_pairs_only_relevant_ones(similar_id_pairs);
78 vector<int> frequency(NUMBER_OF_TYPES);
79 for (
auto ids : similar_id_pairs_set) {
80 frequency[id_pair_to_type[ids]] += 1;
83 frequency[NOT_CLONE_TYPE_ID] *= -1;
84 frequency[NOT_CLONE_TYPE_ID] += count_of_samples_by_type[NOT_CLONE_TYPE_ID];
88double BigCloneTailorEvaluator::calc_recall(vector<int> frequency,
int type) {
89 double TP = frequency[type];
90 double FN = count_of_samples_by_type[type] - frequency[type];
91 double recall = TP / (TP + FN);
95void BigCloneTailorEvaluator::print_recall_per_type(vector<int> frequency) {
96 cout << RECALL_PER_TYPE_PRINT <<
'\n';
97 for (
int type = 0; type < NUMBER_OF_TYPES; type++) {
98 double recall = calc_recall(frequency, type);
99 cout << ID_TO_TYPE_LABEL[type] <<
' ';
100 cout << fixed << setprecision(2) << recall <<
'\n';
104void BigCloneTailorEvaluator::evaluate(
double minimum_similarity) {
105 vector<tuple<double, int, int>> similar_id_pairs_similarity = similar_path_pairs_formated_with_id();
106 vector<pair<int, int>> similar_id_pairs = filter_similar_path_pairs_by_similarity(
107 similar_id_pairs_similarity,
109 vector<int> frequency = build_frequency_corrected_guessed_by_type(similar_id_pairs);
110 print_recall_per_type(frequency);
113BigCloneTailorEvaluator::BigCloneTailorEvaluator(
Similarity_Table* _similarity_table) {
114 similarity_table = _similarity_table;
123 evaluate(MINIMUM_SIMILARITY_TEMP);
BigCloneBench evaluation interface * Provides evaluation metrics compatible with BigCloneBench datase...
bool validate(const ParsedOptions &options) override
Validate the arguments already analyzed.
bool run(const ParsedOptions &options) override
Handles BigCloneBench evaluation command.
Path manipulation class for tool-specific directory structure.
std::string build_relative_path() const
Builds relative path portion.
Represents a similarity graph between functions (paths).
std::vector< std::tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
std::vector< std::string > split_string(const std::string &s, char delimiter)
Splits a string by a delimiter into tokens.
std::vector< std::string > read_file_generic(const fs::path &string_path)
Reads a file line by line into a vector of strings.