Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
run_action.py
Go to the documentation of this file.
1#!/usr/bin/env python
2
3import os
4import sys
5import json
6import requests
7import argparse
8
9import duplicate_code_detection
10
11WARNING_SUFFIX = " ⚠️"
12
13
15 """Input: Python list with rows of table as lists
16 First element as header.
17 Output: String to put into a .md file
18
19 Ex Input:
20 [["Name", "Age", "Height"],
21 ["Jake", 20, 5'10],
22 ["Mary", 21, 5'7]]
23
24 Adopted from: https://gist.github.com/m0neysha/219bad4b02d2008e0154
25 """
26 markdown = "\n" + str("| ")
27
28 for e in array[0]:
29 to_add = " " + str(e) + str(" |")
30 markdown += to_add
31 markdown += "\n"
32
33 markdown += "|"
34 for i in range(len(array[0])):
35 markdown += str("-------------- | ")
36 markdown += "\n"
37
38 markdown_characters = 0
39 max_characters = 65000
40 for entry in array[1:]:
41 markdown += str("| ")
42 for e in entry:
43 to_add = str(e) + str(" | ")
44 markdown += to_add
45 markdown += "\n"
46 markdown_characters += len(markdown)
47 if markdown_characters > max_characters:
48 markdown += "\n" + WARNING_SUFFIX + " "
49 markdown += "Results were omitted because the report was too large. "
50 markdown += "Please consider ignoring results below a certain threshold.\n"
51 break
52
53 return markdown + "\n"
54
55
56def get_markdown_link(file, url):
57 return "[%s](%s%s)" % (file, url, file)
58
59
60def get_warning(similarity, warn_threshold):
61 return (
62 str(similarity)
63 if similarity < int(warn_threshold)
64 else str(similarity) + WARNING_SUFFIX
65 )
66
67
68def similarities_to_markdown(similarities, url_prefix, warn_threshold):
69 markdown = str()
70 for checked_file in similarities.keys():
71 markdown += "<details><summary>%s</summary>\n\n" % checked_file
72 markdown += "### 📄 %s\n" % get_markdown_link(checked_file, url_prefix)
73
74 table_header = ["File", "Similarity (%)"]
75 table_contents = [
76 [get_markdown_link(f, url_prefix), get_warning(s, warn_threshold)]
77 for (f, s) in similarities[checked_file].items()
78 ]
79 # Sort table contents based on similarity
80 table_contents.sort(
81 reverse=True, key=lambda row: float(row[1].replace(WARNING_SUFFIX, ""))
82 )
83 entire_table = [[] for _ in range(len(table_contents) + 1)]
84 entire_table[0] = table_header
85 for i in range(1, len(table_contents) + 1):
86 entire_table[i] = table_contents[i - 1]
87
88 markdown += make_markdown_table(entire_table)
89 markdown += "</details>\n"
90
91 return markdown
92
93
94def split_and_trim(input_list):
95 return [token.strip() for token in input_list.split(",")]
96
97
99 return [os.path.abspath(path) for path in paths]
100
101
102def main():
103 parser = argparse.ArgumentParser(
104 description="Duplicate code detection action runner"
105 )
106 parser.add_argument(
107 "--latest-head",
108 type=str,
109 default="master",
110 help="The latest commit hash or branch",
111 )
112 parser.add_argument(
113 "--pull-request-id", type=str, required=True, help="The pull request id"
114 )
115 args = parser.parse_args()
116
117 fail_threshold = os.environ.get("INPUT_FAIL_ABOVE")
118 directories = os.environ.get("INPUT_DIRECTORIES")
119 ignore_directories = os.environ.get("INPUT_IGNORE_DIRECTORIES")
120 project_root_dir = os.environ.get("INPUT_PROJECT_ROOT_DIR")
121 file_extensions = os.environ.get("INPUT_FILE_EXTENSIONS")
122 ignore_threshold = os.environ.get("INPUT_IGNORE_BELOW")
123 only_code = os.environ.get("INPUT_ONLY_CODE")
124
125 directories_list = split_and_trim(directories)
126 directories_list = to_absolute_path(directories_list)
127 ignore_directories_list = (
128 split_and_trim(ignore_directories) if ignore_directories != "" else list()
129 )
130 ignore_directories_list = to_absolute_path(ignore_directories_list)
131 file_extensions_list = split_and_trim(file_extensions)
132 project_root_dir = os.path.abspath(project_root_dir)
133
134 files_list = None
135 ignore_files_list = None
136 json_output = True
137 csv_output_path = "" # No CSV output by default for now in GitHub Actions
138 show_loc = False
139
140 detection_result, code_similarity = duplicate_code_detection.run(
141 int(fail_threshold),
142 directories_list,
143 files_list,
144 ignore_directories_list,
145 ignore_files_list,
146 json_output,
147 project_root_dir,
148 file_extensions_list,
149 int(ignore_threshold),
150 bool(only_code),
151 csv_output_path,
152 show_loc,
153 )
154
155 if detection_result == duplicate_code_detection.ReturnCode.BAD_INPUT:
156 print("Action aborted due to bad user input")
157 return detection_result.value
158 elif detection_result == duplicate_code_detection.ReturnCode.THRESHOLD_EXCEEDED:
159 print(
160 "Action failed due to maximum similarity threshold exceeded, check the report"
161 )
162
163 repo = os.environ.get("GITHUB_REPOSITORY")
164 files_url_prefix = "https://github.com/%s/blob/%s/" % (repo, args.latest_head)
165 warn_threshold = os.environ.get("INPUT_WARN_ABOVE")
166
167 header_message_start = os.environ.get("INPUT_HEADER_MESSAGE_START") + "\n"
168 message = header_message_start
169 message += "The [tool](https://github.com/platisd/duplicate-code-detection-tool)"
170 message += " analyzed your source code and found the following degree of"
171 message += " similarity between the files:\n"
172 message += similarities_to_markdown(
173 code_similarity, files_url_prefix, warn_threshold
174 )
175
176 github_token = os.environ.get("INPUT_GITHUB_TOKEN")
177 github_api_url = os.environ.get("GITHUB_API_URL")
178
179 request_url = "%s/repos/%s/issues/%s/comments" % (
180 github_api_url,
181 repo,
182 args.pull_request_id,
183 )
184
185 headers = {
186 "Authorization": "token %s" % github_token,
187 }
188 report = {"body": message}
189
190 update_existing_comment = os.environ.get("INPUT_ONE_COMMENT", "false").lower() in (
191 "true",
192 "1",
193 )
194 comment_updated = False
195 if update_existing_comment:
196 # If the bot has posted many comments, update the last one
197 pr_comments = requests.get(request_url, headers=headers).json()
198 for pr_comment in pr_comments[::-1]:
199 if pr_comment["body"].startswith(header_message_start):
200 update_result = requests.patch(
201 pr_comment["url"],
202 json=report,
203 headers=headers,
204 )
205 if update_result.status_code != 200:
206 print(
207 "Updating existing comment failed with code: "
208 + str(update_result.status_code)
209 )
210 print(update_result.text)
211 print("Attempting to post a new comment instead")
212 else:
213 comment_updated = True
214 break
215
216 if not comment_updated:
217 post_result = requests.post(
218 request_url,
219 json=report,
220 headers=headers,
221 )
222
223 if post_result.status_code != 201:
224 print(
225 "Posting results to GitHub failed with code: "
226 + str(post_result.status_code)
227 )
228 print(post_result.text)
229
230 with open("message.md", "w") as f:
231 f.write(message)
232
233 return detection_result.value
234
235
236if __name__ == "__main__":
237 sys.exit(main())
run(fail_threshold, directories, files, ignore_directories, ignore_files, json_output, project_root_dir, file_extensions, ignore_threshold, only_code, csv_output, show_loc)
to_absolute_path(paths)
Definition run_action.py:98
get_markdown_link(file, url)
Definition run_action.py:56
make_markdown_table(array)
Definition run_action.py:14
similarities_to_markdown(similarities, url_prefix, warn_threshold)
Definition run_action.py:68
split_and_trim(input_list)
Definition run_action.py:94
get_warning(similarity, warn_threshold)
Definition run_action.py:60