Arkanjo 0.2
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
similarity_explorer.cpp
Go to the documentation of this file.
1/*
2This file expect the output of parser.cpp
3This code filter the output only printing the files similar to files that contains a given pattern passed as argc
4The code filter every file that has the pattern as a substring, so be carefull with duplications
5*/
6
7#include <algorithm>
8#include <iostream>
9#include <utility>
10
13
15
16int SimilarityExplorer::find_number_pairs_show(int number_pair_found) const {
17 if (limit_on_results == UNLIMITED_RESULTS) {
18 return number_pair_found;
19 }
20 return std::min(limit_on_results, number_pair_found);
21}
22
23bool SimilarityExplorer::match_pattern(const Path& path1, const Path& path2) const {
24 bool match1 = path1.contains_given_pattern(pattern_to_match);
25 bool match2 = path2.contains_given_pattern(pattern_to_match);
26
27 if (both_path_need_to_match_pattern) {
28 return match1 && match2;
29 }
30 return match1 || match2;
31}
32
33int SimilarityExplorer::find_number_lines(const Path& path1) {
34 Function function(path1);
35 function.load();
36 return function.number_of_lines();
37}
38
39SimilarityExplorerEntry SimilarityExplorer::process_similar_path_pair(const Path& path1, const Path& path2) {
40 if (!match_pattern(path1, path2)) {
41 return {};
42 }
43 if (limit_on_results != UNLIMITED_RESULTS && processed_results >= limit_on_results) {
44 return {};
45 }
46 processed_results++;
47
48 return {
51 find_number_lines(path1)
52 };
53}
54
55int SimilarityExplorer::find_number_pair_found(const std::vector<std::pair<Path, Path>>& similar_path_pairs) const {
56 int count = 0;
57 for (const auto& [path1, path2] : similar_path_pairs) {
58 if (match_pattern(path1, path2)) {
59 count++;
60 }
61 }
62 return count;
63}
64
65std::vector<std::pair<Path, Path>> SimilarityExplorer::build_similar_path_pairs() {
66 std::vector<std::pair<Path, Path>> similar_path_pairs;
67 if (sorted_by_number_of_duplicated_code) {
68 similar_path_pairs = similarity_table->get_all_similar_path_pairs_sorted_by_line_number();
69 } else {
70 similar_path_pairs = similarity_table->get_all_similar_path_pairs_sorted_by_similarity();
71 }
72 return similar_path_pairs;
73}
74
75void SimilarityExplorer::explorer_clusters() {
76 auto clusters_info = similarity_table->get_clusters_info(sorted_by_number_of_duplicated_code);
77
78 int clusters_to_show = limit_on_results > 0
79 ? std::min(limit_on_results, (int)clusters_info.size())
80 : (int)clusters_info.size();
81
82 for (int idx = 0; idx < clusters_to_show; idx++) {
83 const auto& info = clusters_info[idx];
84 fm::write("Cluster #" + std::to_string(idx + 1)
85 + " (Files: " + std::to_string(info.paths.size())
86 + ", Pairs: " + std::to_string(info.total_pairs)
87 + ", Lines: " + std::to_string(info.total_lines)
88 + ", Score: " + std::to_string(info.score()) + ")");
90
91 std::vector<SimilarityExplorerEntry> entries{};
92 for (const auto& path : info.paths) {
93 entries.push_back({
94 path.format_path_message_in_pair(), "",
95 find_number_lines(path)
96 });
97 }
98
99 fm::write(TEMPLATE_PROCESSED_RESULTS_CLUSTERS, entries, Format::AUTO, [](size_t i) {
100 return (i % 2 == 0)
101 ? fm::get_formatter()->style().at("row_even")
102 : fm::get_formatter()->style().at("row_odd");
103 });
104 fm::write("");
105 }
106}
107
108void SimilarityExplorer::explorer() {
109 std::vector<std::pair<Path, Path>> similar_path_pairs = build_similar_path_pairs();
110 int number_pair_found = find_number_pair_found(similar_path_pairs);
111 int number_pairs_show = find_number_pairs_show(number_pair_found);
112
113 fm::write(TEMPLATE_INITIAL_TEXT, SimilarityExplorerInitialMessage{
114 number_pair_found, number_pairs_show
115 }, Format::TEXT);
117
118 std::vector<SimilarityExplorerEntry> vector_entry = {};
119 for (const auto& [path1, path2] : similar_path_pairs) {
120 auto entry = process_similar_path_pair(path1, path2);
121 if (entry.duplicated_lines < 0) continue;
122 vector_entry.push_back(entry);
123 }
124 fm::write(TEMPLATE_PROCESSED_RESULTS, vector_entry, Format::AUTO, [](size_t i) {
125 return (i % 2 == 0)
126 ? fm::get_formatter()->style().at("row_even")
127 : fm::get_formatter()->style().at("row_odd");
128 });
129}
130
131SimilarityExplorer::SimilarityExplorer(Similarity_Table* table)
132 : similarity_table(table),
133 limit_on_results(0),
134 pattern_to_match(""),
135 both_path_need_to_match_pattern(false),
136 sorted_by_number_of_duplicated_code(false),
137 use_clusters(false) {}
138
140 auto it_limiter = options.args.find("limiter");
141
142 if (it_limiter != options.args.end()) {
143 try {
144 limit_on_results = std::stoi(it_limiter->second);
145 } catch (const std::invalid_argument&) {
146 throw CLIError("--limiter must be a valid number (passing " + it_limiter->second + ")");
147 return false;
148 } catch (const std::out_of_range&) {
149 throw CLIError("--limiter outside the permitted range");
150 return false;
151 }
152 }
153
154 return true;
155}
156
158 auto it_pattern = options.args.find("pattern");
159 if (it_pattern != options.args.end()) {
160 pattern_to_match = it_pattern->second;
161 }
162 both_path_need_to_match_pattern = options.args.count("both-match") > 0;
163 sorted_by_number_of_duplicated_code = options.args.count("sort") > 0;
164 use_clusters = options.args.count("cluster") > 0;
165
166 if (use_clusters) {
167 explorer_clusters();
168 return true;
169 }
170 explorer();
171 return true;
172}
Base class for CLI-related errors.
Definition cli_error.hpp:10
const CliOption * options() const final
static void write(const std::string &template_str, const std::vector< T > &data, enum Format effective=Format::AUTO, RowColorFn color_fn=nullptr, std::ostream &out=std::cout)
static std::shared_ptr< IFormatter > get_formatter()
Represents a code function with its content and metadata.
Definition function.hpp:30
Path manipulation class for tool-specific directory structure.
Definition path.hpp:27
std::string format_path_message_in_pair() const
Formats path for display.
Definition path.cpp:145
bool contains_given_pattern(const std::string &pattern) const
Checks for pattern in path.
Definition path.cpp:140
static constexpr int UNLIMITED_RESULTS
Constant for unlimited results display.
bool validate(const ParsedOptions &options) override
Validate the arguments already analyzed.
bool run(const ParsedOptions &options) override
Handles code exploration command.
Represents a similarity graph between functions (paths).
std::vector< ClusterInfo > get_clusters_info(bool sorted)
Returns detailed information about all clusters found in the similarity table.
std::vector< std::pair< Path, Path > > get_all_similar_path_pairs_sorted_by_similarity()
Gets all similar path pairs, sorted by similarity.
std::vector< std::pair< Path, Path > > get_all_similar_path_pairs_sorted_by_line_number()
Gets all similar path pairs, sorted by line count.
const std::string LIMITER_PRINT
Constant string used as a visual delimiter/separator in prints.
Definition utils.hpp:33
Duplicate function exploration interface.