Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
function_breaker_c.cpp
Go to the documentation of this file.
1// TODO - line declaration extraction does not work really well to extract definition
2// because of comments or define in the middle of definition
3
5
6bool FunctionBreakerC::is_define(int line, int pos){
7 int line_size = file_content.size();
8 // does not fit the #define token
9 if(pos+7 > line_size) return false;
10 // match the token
11 string token = "#define";
12 bool match = true;
13 for(int j = 0; j < 7; j++){
14 match &= file_content[line][pos+j] == token[j];
15 }
16 return match;
17}
18
19// Only works if the code is compilable. I do have grant any
20// ensurances if the source code does not compile
21void FunctionBreakerC::filter_mask_commentaries_and_defines(vector<vector<bool>>& mask){
22 // aqui tbm tem que lidar com string literal, ie, "#define" nao eh define a
23 // eh "//" nao eh commentario
24 int number_lines = file_content.size();
25 bool is_open_block_comment = false;
26 bool is_open_define = false;
27 bool is_open_quotation_marks = false;
28 bool is_open_line_comment = false;
29
30 for(int i = 0; i < number_lines; i++){
31 auto& line = file_content[i];
32 auto& mask_line = mask[i];
33 int line_size = line.size();
34
35 if(is_open_define){
36 for(int j = 0; j < line_size; j++){
37 mask_line[j] = false;
38 }
39 // if the last token is to continue the define
40 if(line.back() != '\\'){
41 is_open_define = false;
42 }
43 continue;
44 }
45
46 if(is_open_line_comment){
47 for(int j = 0; j < line_size; j++){
48 mask_line[j] = false;
49 }
50 // if the last token is to continue the define
51 if(line.back() != '\\'){
52 is_open_line_comment = false;
53 }
54 continue;
55 }
56
57 for(int j = 0; j < line_size; j++){
58 if(is_open_block_comment){
59 mask_line[j] = false;
60 // if the block line comes to an end
61 if(j+1 < line_size && line[j] == '*' && line[j+1] == '/'){
62 j++;
63 mask_line[j] = false;
64 is_open_block_comment = false;
65 }
66 continue;
67 }
68
69 if(is_open_quotation_marks){
70 mask_line[j] = false;
71 // TODO should I take a look on ""s ?
72 if(line[j] == '"'){
73 is_open_quotation_marks = false;
74 }else if(line[j] == '\\'){
75 if(j == line_size-1){
76 break;
77 }else{
78 j++;
79 mask_line[j] = false;
80 }
81 }
82 continue;
83 }
84
85 if(line[j] == '\''){
86 assert(j+1 < line_size &&
87 "source code does not compile, ' open but not closed");
88 mask_line[j] = false;
89 j++;
90 if(line[j] == '\\'){
91 assert(j+2 < line_size && line[j+2] == '\'' &&
92 "source code does not compile, ' open but not closed");
93 mask_line[j] = false;
94 j++;
95 }else{
96 assert(j+1 < line_size && line[j+1] == '\'' &&
97 "source code does not compile, ' open but not closed");
98 }
99 mask_line[j] = false;
100 j++;
101 mask_line[j] = false;
102 continue;
103 }
104
105 if(line[j] == '"'){
106 is_open_quotation_marks = true;
107 mask_line[j] = false;
108 continue;
109 }
110
111 if(line[j] == '/'){
112 if(j == line_size-1){
113 continue;
114 }
115
116 if(line[j+1] == '/'){
117 for(int k = j; k < line_size; k++){
118 mask_line[k] = false;
119 }
120 // find line comment, everything after is comment and
121 // break the iteration on the current line
122 is_open_line_comment = line.back() == '\\';
123 break;
124 }
125
126 if(line[j+1] == '*'){
127 mask_line[j] = false;
128 j++;
129 mask_line[j] = false;
130 is_open_block_comment = true;
131 continue;
132 }
133 }
134
135 if(is_define(i,j)){
136 for(int k = j; k < line_size; k++){
137 mask_line[k] = false;
138 }
139 // find #define, everything after is comment and
140 // break the iteration on the current line
141 is_open_define = line.back() == '\\';
142 break;
143 }
144 }
145 }
146
147 assert(is_open_block_comment == false &&
148 "source code does not compile, open block comment");
149 assert(is_open_quotation_marks == false &&
150 "source code does not compile, open quotation marks");
151}
152
153// the exactly same size of the input source, the character will be 1 if it is not in a commentary nor a #define's
154vector<vector<bool>> FunctionBreakerC::build_mask_valid_code(){
155 vector<vector<bool>> mask(file_content.size());
156 for(int i = 0; i < (int)file_content.size(); i++){
157 mask[i] = vector<bool>(file_content[i].size(),true);
158 }
159 filter_mask_commentaries_and_defines(mask);
160 return mask;
161}
162
163set<array<int,5>> FunctionBreakerC::find_start_end_and_depth_of_brackets(){
164 set<array<int,5>> start_ends;
165 int open_brackets = 0;
166
167 vector<pair<int,int>> not_processed_open_brackets;
168 auto process_open = [&](int line_number, int column){
169 open_brackets++;
170 not_processed_open_brackets.push_back({line_number,column});
171 };
172 auto process_close = [&](int line_number, int column){
173 open_brackets--;
174 if(open_brackets <= -1){
175 open_brackets = 0;
176 }else{
177 auto [matched_line,matched_column]= not_processed_open_brackets.back();
178 not_processed_open_brackets.pop_back();
179 int depth_of_open = not_processed_open_brackets.size();
180 start_ends.insert({matched_line,
181 matched_column,
182 line_number,
183 column,
184 depth_of_open});
185 }
186 };
187
188 for(size_t i = 0; i < file_content.size(); i++){
189 auto& line = file_content[i];
190 for(size_t j = 0; j < line.size(); j++){
191 if(!mask_valid[i][j]){
192 continue;
193 }
194 auto c = line[j];
195 if(c == '{'){
196 process_open(i,j);
197 }
198 if(c == '}'){
199 process_close(i,j);
200 }
201 }
202 }
203 return start_ends;
204}
205
206set<array<int,4>> FunctionBreakerC::find_start_end_of_brackets_of_given_depth(){
207 set<array<int,4>> ret;
208 set<array<int,5>> bracket_pairs = find_start_end_and_depth_of_brackets();
209 for(auto [start_line,start_column,end_line,end_column,dep] : bracket_pairs){
210 if(dep == C_RELEVANT_DEPTH){
211 ret.insert({start_line,start_column,end_line,end_column});
212 }
213 }
214 return ret;
215}
216
217vector<string> FunctionBreakerC::build_function_content(int start_number_line, int start_column, int end_number_line, int end_column){
218 vector<string> function_content;
219
220 if(start_number_line == end_number_line){
221 string line = "";
222 for(int j = start_column; j <= end_column; j++){
223 line += file_content[start_number_line][j];
224 }
225 function_content.push_back(line);
226 return function_content;
227 }
228
229 string first_line = file_content[start_number_line];
230 int first_line_size = first_line.size();
231 string first_line_contribution = "";
232 for(int j = start_column; j < first_line_size; j++){
233 first_line_contribution += first_line[j];
234 }
235 function_content.push_back(first_line_contribution);
236
237 for(int i = start_number_line+1; i < end_number_line; i++){
238 function_content.push_back(file_content[i]);
239 }
240
241 string last_line = file_content[end_number_line];
242 int last_line_size = last_line.size();
243 string last_line_contribution = "";
244 for(int j = 0; j <= end_column; j++){
245 last_line_contribution += last_line[j];
246 }
247 function_content.push_back(last_line_contribution);
248
249 return function_content;
250}
251
252
253bool FunctionBreakerC::move_pointer_until_character_outside_parenteses(int &line, int &column){
254 int quantity_open = 0;
255 bool has_parenteses = false;
256 while(line != 0 || column != -1){
257 if(column == -1){
258 line -= 1;
259 column = file_content[line].size();
260 column -= 1;
261 continue;
262 }
263
264 auto c = file_content[line][column];
265 if(!mask_valid[line][column]){
266 column--;
267 continue;
268 }
269 if(c == ')'){
270 quantity_open++;
271 has_parenteses = true;
272 column--;
273 continue;
274 }
275 if(c == '('){
276 quantity_open--;
277 has_parenteses = true;
278 column--;
279 continue;
280 }
281 if(Utils::is_special_char(c) || quantity_open != 0){
282 column--;
283 continue;
284 }
285 break;
286 }
287 assert( !(line == 0 && column == -1) && "code does not compile, bad formation of parenteses ()");
288 return has_parenteses;
289}
290
291//extract function_name, declaration start line and header content
292tuple<string,int,vector<string>> FunctionBreakerC::extract_header_related_information(int start_line, int start_column){
293 int line = start_line;
294 int column = start_column-1;
295
296 bool has_parenteses = move_pointer_until_character_outside_parenteses(line,column);
297
298 string file_name = "";
299 while(column != -1){
300 auto c = file_content[line][column];
302 break;
303 }
304 file_name += c;
305 column--;
306 }
307 reverse(file_name.begin(),file_name.end());
308
309 move_pointer_until_character_outside_parenteses(line,column);
310
311 while(column != -1 || !Utils::is_special_char(file_content[line][column])){
312 column--;
313 }
314 column++;
315
316 vector<string> header_content;
317 if(start_column == 0){
318 header_content = build_function_content(line,column,start_line-1,(int)file_content[start_line-1].size() -1);
319 }else{
320 header_content = build_function_content(line,column,start_line,start_column-1);
321 }
322
323 if(!ALLOW_STRUCTS && !has_parenteses){
324 return {"",-1,header_content};
325 }
326 return {file_name,line,header_content};
327}
328
329bool FunctionBreakerC::is_body_function_empty(int start_number_line, int start_column, int end_number_line, int end_column){
330 vector<string> function_content = build_function_content(start_number_line, start_column, end_number_line, end_column);
331 int count_not_empty_char = 0;
332 for(auto line : function_content){
333 for(auto c : line){
334 if(!Utils::is_empty_char(c)){
335 count_not_empty_char++;
336 }
337 }
338 }
339 bool is_empty = count_not_empty_char <= 2;
340 return is_empty;
341}
342
343void FunctionBreakerC::process_function(int start_number_line,
344 int start_column,
345 int end_number_line,
346 int end_column,
347 string relative_path){
348 string first_line = file_content[start_number_line];
349 auto [function_name, line_declaration, header_content] = extract_header_related_information(start_number_line,start_column);
350 if(function_name.empty()){
351 return;
352 }
353 if(IGNORE_EMPTY_FUNCTIONS){
354 if(is_body_function_empty(start_number_line,start_column,end_number_line,end_column)){
355 return;
356 }
357 }
358 vector<string> function_content = build_function_content(start_number_line,start_column,end_number_line,end_column);
359
360 create_source_file(start_number_line,end_number_line,relative_path,function_name,function_content);
361 create_header_file(relative_path, function_name, header_content);
362 create_info_file(line_declaration,start_number_line,end_number_line,relative_path,function_name);
363}
364
365string FunctionBreakerC::file_path_from_folder_path(string file_path, string folder_path){
366 string ret = "";
367 for(size_t i = folder_path.size(); i < file_path.size(); i++){
368 ret += file_path[i];
369 }
370 return ret;
371}
372
373void FunctionBreakerC::file_breaker_c(string file_path, string folder_path){
374 string relative_path = file_path_from_folder_path(file_path, folder_path);
375 file_content = Utils::read_file_generic(file_path);
376 mask_valid = build_mask_valid_code();
377
378 set<array<int,4>> start_end_of_functions = find_start_end_of_brackets_of_given_depth();
379 for(auto [start_line,start_column,end_line,end_column] : start_end_of_functions){
380 process_function(start_line,start_column,end_line,end_column,relative_path);
381 }
382}
383
384FunctionBreakerC::FunctionBreakerC(string file_path, string folder_path){
385 file_breaker_c(file_path, folder_path);
386}
FunctionBreakerC(string file_path, string folder_path)
Constructs function breaker and processes file.
return ret
Definition sum.c:3
C/C++ function parsing and extraction.
void create_info_file(int line_declaration, int start_number_line, int end_number_line, string relative_path, string function_name)
Creates JSON metadata file for a function.
void create_header_file(string relative_path, string function_name, const vector< string > &header_content)
Creates header file for a function.
void create_source_file(int start_number_line, int end_number_line, string relative_path, string function_name, const vector< string > &function_content)
Creates source file for a function.
bool is_special_char(char c)
Checks if a character is special (non-alphanumeric and not underscore)
Definition utils.cpp:103
bool is_empty_char(char c)
Checks if a character is considered empty/whitespace.
Definition utils.cpp:93
vector< string > read_file_generic(string string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:19