Arkanjo 0.2
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
function_breaker_java.cpp
Go to the documentation of this file.
2
3set<array<int, 3>> FunctionBreakerJava::find_start_end_and_depth_of_brackets(const vector<string>& brackets_content) {
4 set<array<int, 3>> start_ends;
5 int open_brackets = 0;
6
7 vector<int> not_processed_open_brackets;
8 auto process_open = [&](int line_number) {
9 open_brackets++;
10 not_processed_open_brackets.push_back(line_number);
11 };
12 auto process_close = [&](int line_number) {
13 open_brackets--;
14 if (open_brackets <= -1) {
15 open_brackets = 0;
16 } else {
17 int matched_open_position = not_processed_open_brackets.back();
18 not_processed_open_brackets.pop_back();
19 int depth_of_open = not_processed_open_brackets.size();
20 start_ends.insert({matched_open_position, line_number, depth_of_open});
21 }
22 };
23
24 for (size_t i = 0; i < brackets_content.size(); i++) {
25 auto line = brackets_content[i];
26 for (auto c : line) {
27 if (c == '{') {
28 process_open(i);
29 }
30 if (c == '}') {
31 process_close(i);
32 }
33 }
34 }
35 return start_ends;
36}
37
38set<pair<int, int>> FunctionBreakerJava::find_start_end_of_brackets_of_given_depth(vector<string> brackets_content, int depth) {
39 set<pair<int, int>> ret;
40 set<array<int, 3>> bracket_pairs = find_start_end_and_depth_of_brackets(brackets_content);
41 for (auto [start, end, dep] : bracket_pairs) {
42 if (dep == depth) {
43 ret.insert({start, end});
44 }
45 }
46 return ret;
47}
48
49int FunctionBreakerJava::find_position_first_open_bracket(const string& s) {
50 for (size_t i = 0; i < s.size(); i++) {
51 char c = s[i];
52 if (c == '{') {
53 return i;
54 }
55 }
56 return -1;
57}
58
59string FunctionBreakerJava::extract_last_token_of_string(const string& s) {
60 vector<string> tokens;
61 string cur_token = "";
62 for (size_t i = 0; i < s.size(); i++) {
63 char c = s[i];
65 if (!cur_token.empty()) {
66 tokens.push_back(cur_token);
67 }
68 cur_token = "";
69 } else {
70 cur_token += c;
71 }
72 }
73 if (!cur_token.empty()) {
74 tokens.push_back(cur_token);
75 }
76
77 if (tokens.empty()) {
78 return "";
79 }
80 return tokens.back();
81}
82
83Line_content FunctionBreakerJava::build_line_code(int line_number, const string& content) {
84 Line_content ret;
85 ret.line_number = line_number;
86 ret.content = content;
87 return ret;
88}
89
90vector<Line_content> FunctionBreakerJava::get_lines_before_body_function(const vector<string>& file_content, int line_start_body_function, int pos_bracket) {
91 vector<Line_content> ret;
92 Line_content line_bracket = build_line_code(line_start_body_function, file_content[line_start_body_function]);
93 // remove everything after {
94 while (int(line_bracket.content.size()) > pos_bracket) {
95 line_bracket.content.pop_back();
96 }
97 ret.push_back(line_bracket);
98
99 int until = max(0, line_start_body_function - NUMBER_OF_LINES_BEFORE_FOR_FUNCTION_NAME);
100 for (int i = line_start_body_function - 1; i >= until; i--) {
101 ret.push_back(build_line_code(i, file_content[i]));
102 }
103 reverse(ret.begin(), ret.end());
104
105 // remove empty lines/empty characters at the end of lines
106 while (!ret.empty()) {
107 if (ret.back().content.empty()) {
108 ret.pop_back();
109 continue;
110 }
111 if (Utils::is_empty_char(ret.back().content.back())) {
112 ret.back().content.pop_back();
113 continue;
114 }
115 break;
116 }
117 return ret;
118}
119
120vector<Line_content> FunctionBreakerJava::remove_parenteses_at_the_end_of_the_scope(vector<Line_content> code) {
121 if (code.empty() || code.back().content.back() != ')') {
122 return code;
123 }
124 int count_close_parenteses = 0;
125
126 while (!code.empty()) {
127 string content = code.back().content;
128 while (!content.empty()) {
129 if (content.back() == ')') {
130 count_close_parenteses++;
131 }
132 if (content.back() == '(') {
133 count_close_parenteses--;
134 }
135 content.pop_back();
136 if (count_close_parenteses == 0) {
137 break;
138 }
139 }
140 code.back().content = content;
141 if (count_close_parenteses == 0) {
142 break;
143 }
144 code.pop_back();
145 }
146 return code;
147}
148
149vector<Line_content> FunctionBreakerJava::remove_content_until_find_parenteses_at_the_end(vector<Line_content> code) {
150 while (!code.empty()) {
151 string content = code.back().content;
152 while (!content.empty()) {
153 if (content.back() == ')') {
154 break;
155 } else {
156 content.pop_back();
157 }
158 }
159 code.back().content = content;
160 if (!content.empty()) {
161 break;
162 }
163 code.pop_back();
164 }
165 return code;
166}
167
168vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration_c(vector<Line_content> code) {
169 if (!ALLOW_STRUCTS) {
170 auto ret = remove_content_until_find_parenteses_at_the_end(code);
171 return remove_parenteses_at_the_end_of_the_scope(ret);
172 }
173 return remove_parenteses_at_the_end_of_the_scope(code);
174}
175
176vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration_java(vector<Line_content> code) {
177 auto ret = remove_content_until_find_parenteses_at_the_end(code);
178 return remove_parenteses_at_the_end_of_the_scope(ret);
179}
180
181vector<Line_content> FunctionBreakerJava::remove_parameters_of_declaration(vector<Line_content> code, PROGRAMMING_LANGUAGE programming_language) {
182 if (programming_language == C) {
183 return remove_parameters_of_declaration_c(code);
184 }
185 if (programming_language == JAVA) {
186 return remove_parameters_of_declaration_java(code);
187 }
188 return code;
189}
190
191pair<string, int> FunctionBreakerJava::extract_function_name_and_line_from_declaration(const vector<string>& file_content, int line_start_body_function, PROGRAMMING_LANGUAGE programming_language) {
192 int pos = find_position_first_open_bracket(file_content[line_start_body_function]);
193 vector<Line_content> code_before_bracket = get_lines_before_body_function(file_content, line_start_body_function, pos);
194 vector<Line_content> code = remove_parameters_of_declaration(code_before_bracket, programming_language);
195 if (code.empty()) {
196 return make_pair("", -1);
197 }
198 string ret = extract_last_token_of_string(code.back().content);
199 return {ret, code.back().line_number};
200}
201
202vector<string> FunctionBreakerJava::build_function_content(int start_number_line, int end_number_line, const vector<string>& file_content) {
203 string first_line = file_content[start_number_line];
204 int to_remove = find_position_first_open_bracket(first_line);
205
206 vector<string> function_content;
207 reverse(first_line.begin(), first_line.end());
208 for (int i = 0; i < to_remove; i++) {
209 first_line.pop_back();
210 }
211 reverse(first_line.begin(), first_line.end());
212 function_content.push_back(first_line);
213 for (int i = start_number_line + 1; i <= end_number_line; i++) {
214 function_content.push_back(file_content[i]);
215 }
216 return function_content;
217}
218
219vector<string> FunctionBreakerJava::build_header_content(int start_number_line, int line_declaration, const fs::path& relative_path, const string& function_name, const vector<string>& file_content) {
220 vector<string> function_content;
221 for (int i = line_declaration; i < start_number_line; i++) {
222 function_content.push_back(file_content[i]);
223 }
224
225 string first_line = file_content[start_number_line];
226 int to_keep = find_position_first_open_bracket(first_line);
227 while (int(first_line.size()) > to_keep) {
228 first_line.pop_back();
229 }
230 function_content.push_back(first_line);
231 return function_content;
232}
233
234bool FunctionBreakerJava::is_body_function_empty(int start_number_line, int end_number_line, const vector<string>& file_content) {
235 vector<string> function_content = build_function_content(start_number_line, end_number_line, file_content);
236 int count_not_empty_char = 0;
237 for (auto line : function_content) {
238 for (auto c : line) {
239 if (!Utils::is_empty_char(c)) {
240 count_not_empty_char++;
241 }
242 }
243 }
244 bool is_empty = count_not_empty_char <= 2;
245 return is_empty;
246}
247
248void FunctionBreakerJava::process_function(int start_number_line, int end_number_line, const fs::path& relative_path, const vector<string>& file_content, PROGRAMMING_LANGUAGE programming_language) {
249 string first_line = file_content[start_number_line];
250 auto [function_name, line_declaration] = extract_function_name_and_line_from_declaration(file_content, start_number_line, programming_language);
251 if (function_name.empty()) {
252 return;
253 }
254 if (IGNORE_EMPTY_FUNCTIONS) {
255 if (is_body_function_empty(start_number_line, end_number_line, file_content)) {
256 return;
257 }
258 }
259
260 vector<string> function_content = build_function_content(start_number_line, end_number_line, file_content);
261 vector<string> header_content = build_header_content(start_number_line, line_declaration, relative_path, function_name, file_content);
262
263 create_source_file(start_number_line, end_number_line, relative_path, function_name, function_content);
264 create_header_file(relative_path, function_name, header_content);
265 create_info_file(line_declaration, start_number_line, end_number_line, relative_path, function_name);
266}
267
268fs::path FunctionBreakerJava::file_path_from_folder_path(const fs::path& file_path, const fs::path& folder_path) {
269 return fs::relative(file_path, folder_path);
270}
271
272void FunctionBreakerJava::file_breaker_java(const fs::path& file_path, const fs::path& folder_path) {
273 fs::path relative_path = file_path_from_folder_path(file_path, folder_path);
274 vector<string> file_content = Utils::read_file_generic(file_path);
275 set<pair<int, int>> start_end_of_functions = find_start_end_of_brackets_of_given_depth(file_content, JAVA_RELEVANT_DEPTH);
276
277 for (auto [start_line, end_line] : start_end_of_functions) {
278 process_function(start_line, end_line, relative_path, file_content, JAVA);
279 }
280}
281
282FunctionBreakerJava::FunctionBreakerJava(const fs::path& file_path, const fs::path& folder_path) {
283 file_breaker_java(file_path, folder_path);
284}
FunctionBreakerJava(const fs::path &file_path, const fs::path &folder_path)
Constructs Java function breaker and processes file.
Java function parsing and extraction.
void create_source_file(int start_number_line, int end_number_line, const fs::path &relative_path, const string &function_name, const vector< string > &function_content)
Creates source file for a function.
void create_info_file(int line_declaration, int start_number_line, int end_number_line, const fs::path &relative_path, const string &function_name)
Creates JSON metadata file for a function.
void create_header_file(const fs::path &relative_path, const string &function_name, const vector< string > &header_content)
Creates header file for a function.
PROGRAMMING_LANGUAGE
Enumeration of supported programming languages.
@ JAVA
Java programming language.
@ C
C programming language.
bool is_special_char(char c)
Checks if a character is special (non-alphanumeric and not underscore)
Definition utils.cpp:66
bool is_empty_char(char c)
Checks if a character is considered empty/whitespace.
Definition utils.cpp:56
std::vector< std::string > read_file_generic(const fs::path &string_path)
Reads a file line by line into a vector of strings.
Definition utils.cpp:13
Structure representing a line of source code.
int line_number
Line number in the original file.
string content
Text content of the line.