Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
function_breaker_java.cpp
Go to the documentation of this file.
2
3set<array<int,3>> FunctionBreakerJava::find_start_end_and_depth_of_brackets(vector<string> brackets_content){
4 set<array<int,3>> start_ends;
5 int open_brackets = 0;
6
7 vector<int> not_processed_open_brackets;
8 auto process_open = [&](int line_number){
9 open_brackets++;
10 not_processed_open_brackets.push_back(line_number);
11 };
12 auto process_close = [&](int line_number){
13 open_brackets--;
14 if(open_brackets <= -1){
15 open_brackets = 0;
16 }else{
17 int matched_open_position = not_processed_open_brackets.back();
18 not_processed_open_brackets.pop_back();
19 int depth_of_open = not_processed_open_brackets.size();
20 start_ends.insert({matched_open_position,line_number,depth_of_open});
21 }
22 };
23
24 for(size_t i = 0; i < brackets_content.size(); i++){
25 auto line = brackets_content[i];
26 for(auto c : line){
27 if(c == '{'){
28 process_open(i);
29 }
30 if(c == '}'){
31 process_close(i);
32 }
33 }
34 }
35 return start_ends;
36}
37
38set<pair<int,int>> FunctionBreakerJava::find_start_end_of_brackets_of_given_depth(vector<string> brackets_content, int depth){
39 set<pair<int,int>> ret;
40 set<array<int,3>> bracket_pairs = find_start_end_and_depth_of_brackets(brackets_content);
41 for(auto [start,end,dep] : bracket_pairs){
42 if(dep == depth){
43 ret.insert({start,end});
44 }
45 }
46 return ret;
47}
48
49int FunctionBreakerJava::find_position_first_open_bracket(string s){
50 for(size_t i = 0; i < s.size(); i++){
51 char c = s[i];
52 if(c == '{'){
53 return i;
54 }
55 }
56 return -1;
57}
58
59string FunctionBreakerJava::extract_last_token_of_string(string s){
60 vector<string> tokens;
61 string cur_token = "";
62 for(size_t i = 0; i < s.size(); i++){
63 char c = s[i];
65 if(!cur_token.empty()){
66 tokens.push_back(cur_token);
67 }
68 cur_token = "";
69 }else{
70 cur_token += c;
71 }
72 }
73 if(!cur_token.empty()){
74 tokens.push_back(cur_token);
75 }
76
77 if(tokens.empty()){
78 return "";
79 }
80 return tokens.back();
81}
82
83Line_content FunctionBreakerJava::build_line_code(int line_number, string content){
85 ret.line_number = line_number;
86 ret.content = content;
87 return ret;
88}
89
90vector<Line_content> FunctionBreakerJava::get_lines_before_body_function(const vector<string> &file_content, int line_start_body_function, int pos_bracket){
91 vector<Line_content> ret;
92 Line_content line_bracket = build_line_code(line_start_body_function,file_content[line_start_body_function]);
93 //remove everything after {
94 while(int(line_bracket.content.size()) > pos_bracket){
95 line_bracket.content.pop_back();
96 }
97 ret.push_back(line_bracket);
98
99 int until = max(0,line_start_body_function-NUMBER_OF_LINES_BEFORE_FOR_FUNCTION_NAME);
100 for(int i = line_start_body_function-1; i >= until; i--){
101 ret.push_back(build_line_code(i,file_content[i]));
102 }
103 reverse(ret.begin(),ret.end());
104
105 //remove empty lines/empty characters at the end of lines
106 while(!ret.empty()){
107 if(ret.back().content.empty()){
108 ret.pop_back();
109 continue;
110 }
111 if(Utils::is_empty_char(ret.back().content.back())){
112 ret.back().content.pop_back();
113 continue;
114 }
115 break;
116 }
117 return ret;
118}
119
120
121vector<Line_content> FunctionBreakerJava::remove_parenteses_at_the_end_of_the_scope(vector<Line_content> code){
122 if(code.empty() || code.back().content.back() != ')'){
123 return code;
124 }
125 int count_close_parenteses = 0;
126
127 while(!code.empty()){
128 string content = code.back().content;
129 while(!content.empty()){
130 if(content.back() == ')'){
131 count_close_parenteses++;
132 }
133 if(content.back() == '('){
134 count_close_parenteses--;
135 }
136 content.pop_back();
137 if(count_close_parenteses == 0){
138 break;
139 }
140 }
141 code.back().content = content;
142 if(count_close_parenteses == 0){
143 break;
144 }
145 code.pop_back();
146 }
147 return code;
148}
149
150vector<Line_content> FunctionBreakerJava::remove_content_until_find_parenteses_at_the_end(vector<Line_content> code){
151 while(!code.empty()){
152 string content = code.back().content;
153 while(!content.empty()){
154 if(content.back() == ')'){
155 break;
156 }else{
157 content.pop_back();
158 }
159 }
160 code.back().content = content;
161 if(!content.empty()){
162 break;
163 }
164 code.pop_back();
165 }
166 return code;
167}
168
169vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration_c(vector<Line_content> code){
170 if(!ALLOW_STRUCTS){
171 auto ret = remove_content_until_find_parenteses_at_the_end(code);
172 return remove_parenteses_at_the_end_of_the_scope(ret);
173 }
174 return remove_parenteses_at_the_end_of_the_scope(code);
175}
176
177vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration_java(vector<Line_content> code){
178 auto ret = remove_content_until_find_parenteses_at_the_end(code);
179 return remove_parenteses_at_the_end_of_the_scope(ret);
180}
181
182vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration(vector<Line_content> code, PROGRAMMING_LANGUAGE programming_language){
183 if(programming_language == C){
184 return remove_parameters_of_declaration_c(code);
185 }
186 if(programming_language == JAVA){
187 return remove_parameters_of_declaration_java(code);
188 }
189 return code;
190}
191
192pair<string,int> FunctionBreakerJava::extract_function_name_and_line_from_declaration(const vector<string> &file_content, int line_start_body_function, PROGRAMMING_LANGUAGE programming_language){
193 int pos = find_position_first_open_bracket(file_content[line_start_body_function]);
194 vector<Line_content> code_before_bracket = get_lines_before_body_function(file_content, line_start_body_function,pos);
195 vector<Line_content> code = remove_parameters_of_declaration(code_before_bracket, programming_language);
196 if(code.empty()){
197 return make_pair("",-1);
198 }
199 string ret = extract_last_token_of_string(code.back().content);
200 return {ret,code.back().line_number};
201}
202
203vector<string> FunctionBreakerJava::build_function_content(int start_number_line, int end_number_line,const vector<string> &file_content){
204 string first_line = file_content[start_number_line];
205 int to_remove = find_position_first_open_bracket(first_line);
206
207 vector<string> function_content;
208 reverse(first_line.begin(),first_line.end());
209 for(int i = 0; i < to_remove; i++){
210 first_line.pop_back();
211 }
212 reverse(first_line.begin(),first_line.end());
213 function_content.push_back(first_line);
214 for(int i = start_number_line+1; i <= end_number_line; i++){
215 function_content.push_back(file_content[i]);
216 }
217 return function_content;
218}
219
220vector<string> FunctionBreakerJava::build_header_content(int start_number_line, int line_declaration, string relative_path, string function_name, const vector<string> &file_content){
221 vector<string> function_content;
222 for(int i = line_declaration; i < start_number_line; i++){
223 function_content.push_back(file_content[i]);
224 }
225
226 string first_line = file_content[start_number_line];
227 int to_keep = find_position_first_open_bracket(first_line);
228 while(int(first_line.size()) > to_keep){
229 first_line.pop_back();
230 }
231 function_content.push_back(first_line);
232 return function_content;
233}
234
235bool FunctionBreakerJava::is_body_function_empty(int start_number_line, int end_number_line,const vector<string> &file_content){
236 vector<string> function_content = build_function_content(start_number_line, end_number_line, file_content);
237 int count_not_empty_char = 0;
238 for(auto line : function_content){
239 for(auto c : line){
240 if(!Utils::is_empty_char(c)){
241 count_not_empty_char++;
242 }
243 }
244 }
245 bool is_empty = count_not_empty_char <= 2;
246 return is_empty;
247}
248
249void FunctionBreakerJava::process_function(int start_number_line, int end_number_line, string relative_path, const vector<string> &file_content, PROGRAMMING_LANGUAGE programming_language){
250 string first_line = file_content[start_number_line];
251 auto [function_name,line_declaration] = extract_function_name_and_line_from_declaration(file_content,start_number_line, programming_language);
252 if(function_name.empty()){
253 return;
254 }
255 if(IGNORE_EMPTY_FUNCTIONS){
256 if(is_body_function_empty(start_number_line,end_number_line,file_content)){
257 return;
258 }
259 }
260
261 vector<string> function_content = build_function_content(start_number_line,end_number_line,file_content);
262 vector<string> header_content = build_header_content(start_number_line,line_declaration,relative_path,function_name,file_content);
263
264 create_source_file(start_number_line,end_number_line,relative_path,function_name,function_content);
265 create_header_file(relative_path, function_name, header_content);
266 create_info_file(line_declaration,start_number_line,end_number_line,relative_path,function_name);
267}
268
269string FunctionBreakerJava::file_path_from_folder_path(string file_path, string folder_path){
270 string ret = "";
271 for(size_t i = folder_path.size(); i < file_path.size(); i++){
272 ret += file_path[i];
273 }
274 return ret;
275}
276
277void FunctionBreakerJava::file_breaker_java(string file_path, string folder_path){
278 string relative_path = file_path_from_folder_path(file_path, folder_path);
279 vector<string> file_content = Utils::read_file_generic(file_path);
280 set<pair<int,int>> start_end_of_functions = find_start_end_of_brackets_of_given_depth(file_content, JAVA_RELEVANT_DEPTH);
281
282 for(auto [start_line, end_line] : start_end_of_functions){
283 process_function(start_line,end_line,relative_path, file_content, JAVA);
284 }
285}
286
287FunctionBreakerJava::FunctionBreakerJava(string file_path, string folder_path){
288 file_breaker_java(file_path, folder_path);
289}
FunctionBreakerJava(string file_path, string folder_path)
Constructs Java function breaker and processes file.
return ret
Definition sum.c:3
Java function parsing and extraction.
void create_info_file(int line_declaration, int start_number_line, int end_number_line, string relative_path, string function_name)
Creates JSON metadata file for a function.
void create_header_file(string relative_path, string function_name, const vector< string > &header_content)
Creates header file for a function.
void create_source_file(int start_number_line, int end_number_line, string relative_path, string function_name, const vector< string > &function_content)
Creates source file for a function.
PROGRAMMING_LANGUAGE
Enumeration of supported programming languages.
@ JAVA
Java programming language.
@ C
C programming language.
bool is_special_char(char c)
Checks if a character is special (non-alphanumeric and not underscore)
Definition utils.cpp:103
bool is_empty_char(char c)
Checks if a character is considered empty/whitespace.
Definition utils.cpp:93
vector< string > read_file_generic(string string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:19
Structure representing a line of source code.
int line_number
Line number in the original file.
string content
Text content of the line.