16int SimilarityExplorer::find_number_pairs_show(
int number_pair_found)
const {
18 return number_pair_found;
20 return std::min(limit_on_results, number_pair_found);
23bool SimilarityExplorer::match_pattern(
const Path& path1,
const Path& path2)
const {
27 if (both_path_need_to_match_pattern) {
28 return match1 && match2;
30 return match1 || match2;
33int SimilarityExplorer::find_number_lines(
const Path& path1) {
36 return function.number_of_lines();
40 if (!match_pattern(path1, path2)) {
43 if (limit_on_results !=
UNLIMITED_RESULTS && processed_results >= limit_on_results) {
51 find_number_lines(path1)
55int SimilarityExplorer::find_number_pair_found(
const std::vector<std::pair<Path, Path>>& similar_path_pairs)
const {
57 for (
const auto& [path1, path2] : similar_path_pairs) {
58 if (match_pattern(path1, path2)) {
65std::vector<std::pair<Path, Path>> SimilarityExplorer::build_similar_path_pairs() {
66 std::vector<std::pair<Path, Path>> similar_path_pairs;
67 if (sorted_by_number_of_duplicated_code) {
72 return similar_path_pairs;
75void SimilarityExplorer::explorer_clusters() {
76 auto clusters_info = similarity_table->
get_clusters_info(sorted_by_number_of_duplicated_code);
78 int clusters_to_show = limit_on_results > 0
79 ? std::min(limit_on_results, (
int)clusters_info.size())
80 : (int)clusters_info.size();
82 for (
int idx = 0; idx < clusters_to_show; idx++) {
83 const auto& info = clusters_info[idx];
84 fm::write(
"Cluster #" + std::to_string(idx + 1)
85 +
" (Files: " + std::to_string(info.paths.size())
86 +
", Pairs: " + std::to_string(info.total_pairs)
87 +
", Lines: " + std::to_string(info.total_lines)
88 +
", Score: " + std::to_string(info.score()) +
")");
91 std::vector<SimilarityExplorerEntry> entries{};
92 for (
const auto& path : info.paths) {
94 path.format_path_message_in_pair(),
"",
95 find_number_lines(path)
108void SimilarityExplorer::explorer() {
109 std::vector<std::pair<Path, Path>> similar_path_pairs = build_similar_path_pairs();
110 int number_pair_found = find_number_pair_found(similar_path_pairs);
111 int number_pairs_show = find_number_pairs_show(number_pair_found);
114 number_pair_found, number_pairs_show
118 std::vector<SimilarityExplorerEntry> vector_entry = {};
119 for (
const auto& [path1, path2] : similar_path_pairs) {
120 auto entry = process_similar_path_pair(path1, path2);
121 if (entry.duplicated_lines < 0)
continue;
122 vector_entry.push_back(entry);
132 : similarity_table(table),
134 pattern_to_match(
""),
135 both_path_need_to_match_pattern(false),
136 sorted_by_number_of_duplicated_code(false),
137 use_clusters(false) {}
140 auto it_limiter =
options.args.find(
"limiter");
142 if (it_limiter !=
options.args.end()) {
144 limit_on_results = std::stoi(it_limiter->second);
145 }
catch (
const std::invalid_argument&) {
146 throw CLIError(
"--limiter must be a valid number (passing " + it_limiter->second +
")");
148 }
catch (
const std::out_of_range&) {
149 throw CLIError(
"--limiter outside the permitted range");
158 auto it_pattern =
options.args.find(
"pattern");
159 if (it_pattern !=
options.args.end()) {
160 pattern_to_match = it_pattern->second;
162 both_path_need_to_match_pattern =
options.args.count(
"both-match") > 0;
163 sorted_by_number_of_duplicated_code =
options.args.count(
"sort") > 0;
164 use_clusters =
options.args.count(
"cluster") > 0;
Base class for CLI-related errors.
const CliOption * options() const final
Represents a code function with its content and metadata.
Path manipulation class for tool-specific directory structure.
std::string format_path_message_in_pair() const
Formats path for display.
bool contains_given_pattern(const std::string &pattern) const
Checks for pattern in path.
static constexpr int UNLIMITED_RESULTS
Constant for unlimited results display.
bool validate(const ParsedOptions &options) override
Validate the arguments already analyzed.
bool run(const ParsedOptions &options) override
Handles code exploration command.
Represents a similarity graph between functions (paths).
std::vector< ClusterInfo > get_clusters_info(bool sorted)
Returns detailed information about all clusters found in the similarity table.
std::vector< std::pair< Path, Path > > get_all_similar_path_pairs_sorted_by_similarity()
Gets all similar path pairs, sorted by similarity.
std::vector< std::pair< Path, Path > > get_all_similar_path_pairs_sorted_by_line_number()
Gets all similar path pairs, sorted by line count.
const std::string LIMITER_PRINT
Constant string used as a visual delimiter/separator in prints.
Duplicate function exploration interface.