Arkanjo 0.2
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
function_breaker_c.cpp
Go to the documentation of this file.
1// TODO - line declaration extraction does not work really well to extract definition
2// because of comments or define in the middle of definition
3
5
6bool FunctionBreakerC::is_define(size_t line, size_t pos) {
7 size_t line_size = file_content.size();
8 // does not fit the #define token
9 if (pos + 7 > line_size)
10 return false;
11 // match the token
12 string token = "#define";
13 bool match = true;
14 for (size_t j = 0; j < 7; j++) {
15 match &= file_content[line][pos + j] == token[j];
16 }
17 return match;
18}
19
20// Only works if the code is compilable. I do have grant any
21// ensurances if the source code does not compile
22void FunctionBreakerC::filter_mask_commentaries_and_defines(vector<vector<bool>>& mask) {
23 // aqui tbm tem que lidar com string literal, ie, "#define" nao eh define a
24 // eh "//" nao eh commentario
25 size_t number_lines = file_content.size();
26 bool is_open_block_comment = false;
27 bool is_open_define = false;
28 bool is_open_quotation_marks = false;
29 bool is_open_line_comment = false;
30
31 for (size_t i = 0; i < number_lines; i++) {
32 auto& line = file_content[i];
33 auto& mask_line = mask[i];
34 size_t line_size = line.size();
35
36 if (is_open_define) {
37 for (size_t j = 0; j < line_size; j++) {
38 mask_line[j] = false;
39 }
40 // if the last token is to continue the define
41 if (line.back() != '\\') {
42 is_open_define = false;
43 }
44 continue;
45 }
46
47 if (is_open_line_comment) {
48 for (size_t j = 0; j < line_size; j++) {
49 mask_line[j] = false;
50 }
51 // if the last token is to continue the define
52 if (line.back() != '\\') {
53 is_open_line_comment = false;
54 }
55 continue;
56 }
57
58 for (size_t j = 0; j < line_size; j++) {
59 if (is_open_block_comment) {
60 mask_line[j] = false;
61 // if the block line comes to an end
62 if (j + 1 < line_size && line[j] == '*' && line[j + 1] == '/') {
63 j++;
64 mask_line[j] = false;
65 is_open_block_comment = false;
66 }
67 continue;
68 }
69
70 if (is_open_quotation_marks) {
71 mask_line[j] = false;
72 // TODO should I take a look on ""s ?
73 if (line[j] == '"') {
74 is_open_quotation_marks = false;
75 } else if (line[j] == '\\') {
76 if (j == line_size - 1) {
77 break;
78 } else {
79 j++;
80 mask_line[j] = false;
81 }
82 }
83 continue;
84 }
85
86 if (line[j] == '\'') {
87 assert(j + 1 < line_size &&
88 "source code does not compile, ' open but not closed");
89 mask_line[j] = false;
90 j++;
91 if (line[j] == '\\') {
92 assert(j + 2 < line_size && line[j + 2] == '\'' &&
93 "source code does not compile, ' open but not closed");
94 mask_line[j] = false;
95 j++;
96 } else {
97 assert(j + 1 < line_size && line[j + 1] == '\'' &&
98 "source code does not compile, ' open but not closed");
99 }
100 mask_line[j] = false;
101 j++;
102 mask_line[j] = false;
103 continue;
104 }
105
106 if (line[j] == '"') {
107 is_open_quotation_marks = true;
108 mask_line[j] = false;
109 continue;
110 }
111
112 if (line[j] == '/') {
113 if (j == line_size - 1) {
114 continue;
115 }
116
117 if (line[j + 1] == '/') {
118 for (size_t k = j; k < line_size; k++) {
119 mask_line[k] = false;
120 }
121 // find line comment, everything after is comment and
122 // break the iteration on the current line
123 is_open_line_comment = line.back() == '\\';
124 break;
125 }
126
127 if (line[j + 1] == '*') {
128 mask_line[j] = false;
129 j++;
130 mask_line[j] = false;
131 is_open_block_comment = true;
132 continue;
133 }
134 }
135
136 if (is_define(i, j)) {
137 for (size_t k = j; k < line_size; k++) {
138 mask_line[k] = false;
139 }
140 // find #define, everything after is comment and
141 // break the iteration on the current line
142 is_open_define = line.back() == '\\';
143 break;
144 }
145 }
146 }
147
148 assert(is_open_block_comment == false &&
149 "source code does not compile, open block comment");
150 assert(is_open_quotation_marks == false &&
151 "source code does not compile, open quotation marks");
152}
153
154// the exactly same size of the input source, the character will be 1 if it is not in a commentary nor a #define's
155vector<vector<bool>> FunctionBreakerC::build_mask_valid_code() {
156 vector<vector<bool>> mask(file_content.size());
157 for (size_t i = 0; i < file_content.size(); i++) {
158 mask[i] = vector<bool>(file_content[i].size(), true);
159 }
160 filter_mask_commentaries_and_defines(mask);
161 return mask;
162}
163
164set<array<int, 5>> FunctionBreakerC::find_start_end_and_depth_of_brackets() {
165 set<array<int, 5>> start_ends;
166 int open_brackets = 0;
167
168 vector<pair<int, int>> not_processed_open_brackets;
169 auto process_open = [&](int line_number, int column) {
170 open_brackets++;
171 not_processed_open_brackets.push_back({line_number, column});
172 };
173 auto process_close = [&](int line_number, int column) {
174 open_brackets--;
175 if (open_brackets <= -1) {
176 open_brackets = 0;
177 } else {
178 auto [matched_line, matched_column] = not_processed_open_brackets.back();
179 not_processed_open_brackets.pop_back();
180 int depth_of_open = not_processed_open_brackets.size();
181 start_ends.insert({matched_line,
182 matched_column,
183 line_number,
184 column,
185 depth_of_open});
186 }
187 };
188
189 for (size_t i = 0; i < file_content.size(); i++) {
190 auto& line = file_content[i];
191 for (size_t j = 0; j < line.size(); j++) {
192 if (!mask_valid[i][j]) {
193 continue;
194 }
195 auto c = line[j];
196 if (c == '{') {
197 process_open(i, j);
198 }
199 if (c == '}') {
200 process_close(i, j);
201 }
202 }
203 }
204 return start_ends;
205}
206
207set<array<int, 4>> FunctionBreakerC::find_start_end_of_brackets_of_given_depth() {
208 set<array<int, 4>> ret;
209 set<array<int, 5>> bracket_pairs = find_start_end_and_depth_of_brackets();
210 for (auto [start_line, start_column, end_line, end_column, dep] : bracket_pairs) {
211 if (dep == C_RELEVANT_DEPTH) {
212 ret.insert({start_line, start_column, end_line, end_column});
213 }
214 }
215 return ret;
216}
217
218vector<string> FunctionBreakerC::build_function_content(int start_number_line, int start_column, int end_number_line, int end_column) {
219 vector<string> function_content;
220
221 if (start_number_line == end_number_line) {
222 string line = "";
223 for (int j = start_column; j <= end_column; j++) {
224 line += file_content[start_number_line][j];
225 }
226 function_content.push_back(line);
227 return function_content;
228 }
229
230 string first_line = file_content[start_number_line];
231 int first_line_size = first_line.size();
232 string first_line_contribution = "";
233 for (int j = start_column; j < first_line_size; j++) {
234 first_line_contribution += first_line[j];
235 }
236 function_content.push_back(first_line_contribution);
237
238 for (int i = start_number_line + 1; i < end_number_line; i++) {
239 function_content.push_back(file_content[i]);
240 }
241
242 string last_line = file_content[end_number_line];
243 string last_line_contribution = "";
244 for (int j = 0; j <= end_column; j++) {
245 last_line_contribution += last_line[j];
246 }
247 function_content.push_back(last_line_contribution);
248
249 return function_content;
250}
251
252bool FunctionBreakerC::move_pointer_until_character_outside_parenteses(int& line, int& column) {
253 int quantity_open = 0;
254 bool has_parenteses = false;
255 while (line != 0 || column != -1) {
256 if (column == -1) {
257 line -= 1;
258 column = file_content[line].size();
259 column -= 1;
260 continue;
261 }
262
263 auto c = file_content[line][column];
264 if (!mask_valid[line][column]) {
265 column--;
266 continue;
267 }
268 if (c == ')') {
269 quantity_open++;
270 has_parenteses = true;
271 column--;
272 continue;
273 }
274 if (c == '(') {
275 quantity_open--;
276 has_parenteses = true;
277 column--;
278 continue;
279 }
280 if (Utils::is_special_char(c) || quantity_open != 0) {
281 column--;
282 continue;
283 }
284 break;
285 }
286 assert(!(line == 0 && column == -1) && "code does not compile, bad formation of parenteses ()");
287 return has_parenteses;
288}
289
290// extract function_name, declaration start line and header content
291tuple<string, int, vector<string>> FunctionBreakerC::extract_header_related_information(int start_line, int start_column) {
292 int line = start_line;
293 int column = start_column - 1;
294
295 bool has_parenteses = move_pointer_until_character_outside_parenteses(line, column);
296
297 string file_name = "";
298 while (column >= 0 && column < (int)file_content[line].size()) {
299 char c = file_content[line][column];
300 if (Utils::is_special_char(c)) {
301 break;
302 }
303 file_name += c;
304 column--;
305 }
306 reverse(file_name.begin(), file_name.end());
307
308 move_pointer_until_character_outside_parenteses(line, column);
309
310 while (column >= 0 && column < (int)file_content[line].size() && !Utils::is_special_char(file_content[line][column])) {
311 column--;
312 }
313 column++;
314
315 vector<string> header_content;
316 if (start_column == 0) {
317 header_content = build_function_content(line, column, start_line - 1, (int)file_content[start_line - 1].size() - 1);
318 } else {
319 header_content = build_function_content(line, column, start_line, start_column - 1);
320 }
321
322 if (!ALLOW_STRUCTS && !has_parenteses) {
323 return {"", -1, header_content};
324 }
325 return {file_name, line, header_content};
326}
327
328bool FunctionBreakerC::is_body_function_empty(int start_number_line, int start_column, int end_number_line, int end_column) {
329 vector<string> function_content = build_function_content(start_number_line, start_column, end_number_line, end_column);
330 int count_not_empty_char = 0;
331 for (auto line : function_content) {
332 for (auto c : line) {
333 if (!Utils::is_empty_char(c)) {
334 count_not_empty_char++;
335 }
336 }
337 }
338 bool is_empty = count_not_empty_char <= 2;
339 return is_empty;
340}
341
342void FunctionBreakerC::process_function(int start_number_line,
343 int start_column,
344 int end_number_line,
345 int end_column,
346 const fs::path& relative_path) {
347 string first_line = file_content[start_number_line];
348 auto [function_name, line_declaration, header_content] = extract_header_related_information(start_number_line, start_column);
349 if (function_name.empty()) {
350 return;
351 }
352 if (IGNORE_EMPTY_FUNCTIONS) {
353 if (is_body_function_empty(start_number_line, start_column, end_number_line, end_column)) {
354 return;
355 }
356 }
357 vector<string> function_content = build_function_content(start_number_line, start_column, end_number_line, end_column);
358
359 create_source_file(start_number_line, end_number_line, relative_path, function_name, function_content);
360 create_header_file(relative_path, function_name, header_content);
361 create_info_file(line_declaration, start_number_line, end_number_line, relative_path, function_name);
362}
363
364fs::path FunctionBreakerC::file_path_from_folder_path(const fs::path& file_path, const fs::path& folder_path) {
365 return fs::relative(file_path, folder_path);
366}
367
368void FunctionBreakerC::file_breaker_c(const fs::path& file_path, const fs::path& folder_path) {
369 const fs::path& relative_path = file_path_from_folder_path(file_path, folder_path);
370 file_content = Utils::read_file_generic(file_path);
371 mask_valid = build_mask_valid_code();
372
373 set<array<int, 4>> start_end_of_functions = find_start_end_of_brackets_of_given_depth();
374 for (auto [start_line, start_column, end_line, end_column] : start_end_of_functions) {
375 process_function(start_line, start_column, end_line, end_column, relative_path);
376 }
377}
378
379FunctionBreakerC::FunctionBreakerC(const fs::path& file_path, const fs::path& folder_path) {
380 file_breaker_c(file_path, folder_path);
381}
FunctionBreakerC(const fs::path &file_path, const fs::path &folder_path)
Constructs function breaker and processes file.
C/C++ function parsing and extraction.
void create_source_file(int start_number_line, int end_number_line, const fs::path &relative_path, const string &function_name, const vector< string > &function_content)
Creates source file for a function.
void create_info_file(int line_declaration, int start_number_line, int end_number_line, const fs::path &relative_path, const string &function_name)
Creates JSON metadata file for a function.
void create_header_file(const fs::path &relative_path, const string &function_name, const vector< string > &header_content)
Creates header file for a function.
bool is_special_char(char c)
Checks if a character is special (non-alphanumeric and not underscore)
Definition utils.cpp:66
bool is_empty_char(char c)
Checks if a character is considered empty/whitespace.
Definition utils.cpp:56
std::vector< std::string > read_file_generic(const fs::path &string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:13