Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
similarity_table.hpp
Go to the documentation of this file.
1
13#ifndef SIMILARITY_TABLE_HPP
14#define SIMILARITY_TABLE_HPP
15
16#include <string>
17#include <vector>
18#include <map>
19#include <utility>
20#include <tuple>
21#include <fstream>
22
23#include "path.hpp"
24#include "utils.hpp"
25#include "function.hpp"
26using namespace std;
27
35 private:
36 string SIMILARITY_TABLE_FILE_NAME = "tmp/output_parsed.txt";
37 double DEFAULT_SIMILARITY = 100.00;
38 double EPS_ERROR_MARGIN = 1e-6;
39 double MAXIMUM_SIMILARITY = 100.00;
40 double MINIMUM_SIMILARITY = 0.00;
41
42 double similarity_threshold;
43 vector<Path> paths;
44 map<Path,int> path_id;
45 vector<vector<pair<int,double>>> similarity_graph;
46 map<pair<int,int>,double> similarity_table;
47
53 int find_id_path(Path path);
54
59 void read_comparation(ifstream &table_file);
60
65 void read_file_table(ifstream &table_file);
66
70 void init_similarity_table();
71
77 bool is_above_threshold(double similarity);
78
84 vector<tuple<int,Path,Path>> sort_pairs_by_line_number(vector<pair<Path,Path>> similar_path_pairs);
85
86 public:
91 Similarity_Table(double _similarity_threshold);
92
97
102 void update_similarity(double new_similarity_threshold);
103
110 double get_similarity(Path path1, Path path2);
111
118 double is_similar(Path path1, Path path2);
119
124 vector<Path> get_path_list();
125
131 vector<Path> get_similar_path_to_the_reference(Path reference);
132
137 vector<tuple<double,Path,Path>> get_all_path_pairs_and_similarity_sorted_by_similarity();
138
143 vector<pair<Path,Path>> get_all_similar_path_pairs_sorted_by_similarity();
144
149 vector<pair<Path,Path>> get_all_similar_path_pairs_sorted_by_line_number();
150};
151
152#endif
Path manipulation class for tool-specific directory structure.
Definition path.hpp:27
Manages and analyzes function similarity relationships.
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_line_number()
Gets all similar path pairs, sorted by line count.
void update_similarity(double new_similarity_threshold)
Updates similarity threshold.
vector< Path > get_path_list()
Gets list of all known paths.
vector< Path > get_similar_path_to_the_reference(Path reference)
Gets paths similar to reference path.
double is_similar(Path path1, Path path2)
Checks if two paths are similar.
vector< tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
Similarity_Table()
Constructs with default similarity threshold.
double get_similarity(Path path1, Path path2)
Gets similarity between two paths.
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_similarity()
Gets all similar path pairs, sorted by similarity.
Function abstraction for temporary codebase.
Definition json.hpp:5678
Path abstraction for temporary codebase.
Defines utility functions used across all files.