Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
similarity_table.cpp
Go to the documentation of this file.
2
3int Similarity_Table::find_id_path(Path path){
4 if(path_id.find(path) == path_id.end()){
5 path_id[path] = paths.size();
6 paths.push_back(path);
7 vector<pair<int,double>> empty_vec;
8 similarity_graph.push_back(empty_vec);
9 }
10 return path_id[path];
11}
12
13void Similarity_Table::read_comparation(ifstream &table_file){
14 string string_path1, string_path2;
15 double similarity;
16 table_file >> string_path1 >> string_path2 >> similarity;
17
18 int id1 = find_id_path(Path(string_path1));
19 int id2 = find_id_path(Path(string_path2));
20
21 if(id1 > id2){
22 swap(id1,id2);
23 }
24
25 similarity_graph[id1].push_back(make_pair(id2,similarity));
26 similarity_graph[id2].push_back(make_pair(id1,similarity));
27 similarity_table[make_pair(id1,id2)] = similarity;
28}
29
30void Similarity_Table::read_file_table(ifstream &table_file){
31 int number_comparations;
32 table_file >> number_comparations;
33 for(int i = 0; i < number_comparations; i++){
34 read_comparation(table_file);
35 }
36}
37
38void Similarity_Table::init_similarity_table(){
39 std::ifstream table_file;
40 table_file.open(SIMILARITY_TABLE_FILE_NAME);
41 Utils::ensure_file_is_open(table_file,SIMILARITY_TABLE_FILE_NAME);
42
43 read_file_table(table_file);
44
45 table_file.close();
46}
47
48Similarity_Table::Similarity_Table(double _similarity_threshold){
49 similarity_threshold= _similarity_threshold;
50 init_similarity_table();
51}
52
54 similarity_threshold = DEFAULT_SIMILARITY;
55 init_similarity_table();
56}
57
58void Similarity_Table::update_similarity(double new_similarity_threshold){
59 similarity_threshold = new_similarity_threshold;
60}
61
63 int id1 = find_id_path(path1);
64 int id2 = find_id_path(path2);
65
66 if(id1 == id2){
67 return MAXIMUM_SIMILARITY;
68 }
69 if(id1 > id2){
70 swap(id1,id2);
71 }
72 pair<int,int> aux = make_pair(id1,id2);
73 if(similarity_table.find(aux) != similarity_table.end()){
74 return similarity_table[aux];
75 }
76 return MINIMUM_SIMILARITY;
77}
78
79bool Similarity_Table::is_above_threshold(double similarity){
80 return similarity_threshold <= similarity + EPS_ERROR_MARGIN;
81}
82
84 double similarity = get_similarity(path1, path2);
85 return is_above_threshold(similarity);
86}
87
89 return paths;
90}
91
93 int id = find_id_path(reference);
94 vector<Path> ret;
95 for(auto [id,similarity] : similarity_graph[id]){
96 if(is_above_threshold(similarity)){
97 ret.push_back(paths[id]);
98 }
99 }
100 return ret;
101}
102
104 vector<tuple<double,Path,Path>> similar_path_pairs;
105 for(auto [ ids, similarity] : similarity_table){
106 Path path1 = paths[ids.first];
107 Path path2 = paths[ids.second];
108 if(is_similar(path1,path2)){
109 similar_path_pairs.push_back({similarity,path1,path2});
110 }
111 }
112 sort(similar_path_pairs.rbegin(),similar_path_pairs.rend());
113 return similar_path_pairs;
114}
115
118 vector<pair<Path,Path>> ret;
119 for(auto [similarity,path1,path2] : similar_path_pairs){
120 ret.push_back({path1,path2});
121 }
122 return ret;
123}
124
125vector<tuple<int,Path,Path>> Similarity_Table::sort_pairs_by_line_number(vector<pair<Path,Path>> similar_path_pairs){
126 vector<tuple<int,Path,Path>> similar_path_pairs_with_number_of_lines;
127 for(auto [path1,path2] : similar_path_pairs){
128 Function function(path1);
129 tuple<int,Path,Path> aux = {function.number_of_lines(),path1,path2};
130 similar_path_pairs_with_number_of_lines.push_back(aux);
131 }
132 sort(
133 similar_path_pairs_with_number_of_lines.begin(),
134 similar_path_pairs_with_number_of_lines.end(),
135 [&](tuple<int,Path,Path> pair1, tuple<int,Path,Path> pair2){
136 int number_lines1 = get<0>(pair1);
137 int number_lines2 = get<0>(pair2);
138 return number_lines1 > number_lines2;
139 }
140 );
141 return similar_path_pairs_with_number_of_lines;
142}
143
145 vector<pair<Path,Path>> similar_path_pairs = get_all_similar_path_pairs_sorted_by_similarity();
146
147 vector<tuple<int,Path,Path>> similar_path_pairs_with_number_of_lines =
148 sort_pairs_by_line_number(similar_path_pairs);
149
150 vector<pair<Path,Path>> ret;
151 for(auto [line_number,path1,path2] : similar_path_pairs_with_number_of_lines){
152 ret.push_back({path1,path2});
153 }
154
155 return ret;
156}
Represents a code function with its content and metadata.
Definition function.hpp:33
Path manipulation class for tool-specific directory structure.
Definition path.hpp:27
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_line_number()
Gets all similar path pairs, sorted by line count.
void update_similarity(double new_similarity_threshold)
Updates similarity threshold.
vector< Path > get_path_list()
Gets list of all known paths.
vector< Path > get_similar_path_to_the_reference(Path reference)
Gets paths similar to reference path.
double is_similar(Path path1, Path path2)
Checks if two paths are similar.
vector< tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
Similarity_Table()
Constructs with default similarity threshold.
double get_similarity(Path path1, Path path2)
Gets similarity between two paths.
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_similarity()
Gets all similar path pairs, sorted by similarity.
return ret
Definition sum.c:3
void ensure_file_is_open(std::ifstream &file, string file_name)
Ensures that a file stream is successfully opened.
Definition utils.cpp:11
NLOHMANN_BASIC_JSON_TPL_DECLARATION void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL &j1, nlohmann::NLOHMANN_BASIC_JSON_TPL &j2) noexcept(//NOLINT(readability-inconsistent-declaration-parameter-name, cert-dcl58-cpp) is_nothrow_move_constructible< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value &&//NOLINT(misc-redundant-expression, cppcoreguidelines-noexcept-swap, performance-noexcept-swap) is_nothrow_move_assignable< nlohmann::NLOHMANN_BASIC_JSON_TPL >::value)
exchanges the values of two JSON objects
Definition json.hpp:25398
Similarity relationships storage and analysis.