Arkanjo 0.2
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
parser.cpp
Go to the documentation of this file.
1#include "parser.hpp"
2
3Comparation::Comparation(string _path1, string _path2, double _sim) {
4 if (_path1 > _path2)
5 swap(_path1, _path2);
6 path1 = _path1;
7 path2 = _path2;
8 similarity = _sim;
9}
10
11bool Comparation::operator<(const Comparation& com) const {
12 if (similarity != com.similarity) {
13 return similarity > com.similarity;
14 }
15 if (path1 == com.path1) {
16 return path2 < com.path2;
17 }
18 return path1 < com.path1;
19}
20
21bool Comparation::operator==(const Comparation& com) const {
22 return path1 == com.path1 && path2 == com.path2;
23}
24
25vector<string> Parser::parser_line(const string& line) {
26 string at = "";
27 vector<string> ret;
28 for (auto c : line) {
29 if (c <= 20 || c == ' ') { // an space on non-printable char
30 if (!at.empty())
31 ret.push_back(at);
32 at = "";
33 } else {
34 at += c;
35 }
36 }
37 if (!at.empty())
38 ret.push_back(at);
39
40 return ret;
41}
42
43bool Parser::is_an_file(const std::string& s) {
44 return !s.empty() && s[0] == '/';
45}
46
47void removeANSI_inplace(std::string& s) {
48 size_t write = 0;
49
50 for (size_t i = 0; i < s.size(); ++i) {
51 if (s[i] == '\033' && i + 1 < s.size() && s[i + 1] == '[') {
52 i += 2;
53 while (i < s.size() && (s[i] < '@' || s[i] > '~')) {
54 i++;
55 }
56 } else {
57 s[write++] = s[i];
58 }
59 }
60
61 s.resize(write);
62}
63
64double Parser::retrive_similarity(const std::string& s) {
65 try {
66 return std::stod(s);
67 } catch (...) {
68 return 0.0;
69 }
70}
71
72void Parser::parser_block_stream(const std::string& path, const std::vector<std::string>& tokens, set<Comparation>& comparations) {
73 if (tokens.size() < 2)
74 return;
75
76 const std::string& path_comp = tokens[0];
77
78 if (!is_an_file(path_comp))
79 return;
80
81 double similarity = retrive_similarity(tokens[1]);
82
83 if (similarity < similarity_cap)
84 return;
85
86 Comparation com(path, path_comp, similarity);
87 comparations.insert(com);
88}
89
90void Parser::exec_from_stream(FILE* pipe) {
91 std::string line;
92 char chunk[256];
93
94 std::string path;
95
96 while (fgets(chunk, sizeof(chunk), pipe)) {
97 line += chunk;
98
99 if (!line.empty() && line.back() == '\n') {
100 removeANSI_inplace(line);
101
102 auto tokens = parser_line(line);
103 if (tokens.size() > 2) {
104 for (auto token : tokens) {
105 if (is_an_file(token)) {
106 path = token;
107 break;
108 }
109 }
110 line.clear();
111 continue;
112 }
113
114 parser_block_stream(path, tokens, comparations);
115
116 line.clear();
117 }
118 }
119 fout << comparations.size() << '\n';
120 for (const auto& com : comparations) {
121 fout << com.path1 << ' ' << com.path2 << ' ';
122 fout << fixed << setprecision(2) << com.similarity << '\n';
123 }
124}
125
126Parser::Parser(const fs::path& output_file, double similarity_cap)
127 : fout(output_file), similarity_cap(similarity_cap) {
128
129 if (!fout) {
130 throw std::runtime_error("Failed to open output file");
131 }
132}
133
135 if (fout.is_open())
136 fout.close();
137}
void exec_from_stream(FILE *pipe)
Main parsing execution method using stream.
Definition parser.cpp:90
~Parser()
Definition parser.cpp:134
Parser(const fs::path &output_file, double similarity_cap)
Constructs parser with configuration.
Definition parser.cpp:126
void removeANSI_inplace(std::string &s)
Definition parser.cpp:47
Code duplication results parser.
Structure representing a code comparison result.
Definition parser.hpp:31
bool operator<(const Comparation &com) const
Comparison operator for sorting.
Definition parser.cpp:11
double similarity
Similarity score between the files (0-100)
Definition parser.hpp:34
Comparation()
Default constructor.
Definition parser.hpp:39
string path1
Path to first code file being compared.
Definition parser.hpp:32
bool operator==(const Comparation &com) const
Equality comparison operator.
Definition parser.cpp:21
string path2
Path to second code file being compared.
Definition parser.hpp:33