From e3e075e432de45494459e109e80d3595159d8d3d Mon Sep 17 00:00:00 2001 From: josc146 Date: Sun, 4 Feb 2024 19:30:47 +0800 Subject: [PATCH] add parse_api_log.py, this script can extract formatted data from api.log --- parse_api_log.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 parse_api_log.py diff --git a/parse_api_log.py b/parse_api_log.py new file mode 100644 index 0000000..82698f0 --- /dev/null +++ b/parse_api_log.py @@ -0,0 +1,67 @@ +import json +import sys + + +def extract_data(log_file): + entries = [] + + with open(log_file, 'r', encoding="utf-8") as file: + lines = file.readlines() + + for i, line in enumerate(lines): + if line.startswith('Generation Prompt:') and not lines[i + 1].startswith(""): + current_entry = {'prompt': "", 'response': ""} + + prompt_end_point = i + 1 + for j in range(i + 1, len(lines)): + if lines[j].strip().endswith('- INFO'): + current_entry['prompt'] = current_entry['prompt'].rstrip() + break + current_entry['prompt'] += lines[j] + prompt_end_point = j + + for j in range(prompt_end_point + 1, len(lines)): + if lines[j].startswith('Url:') and lines[j].strip().endswith("/completions"): + for k in range(j + 1, len(lines)): + if lines[k].startswith('Data:'): + for l in range(k + 1, len(lines)): + if "RequestsNum: " in lines[l]: + current_entry['response'] = current_entry['response'].rstrip() + entries.append(current_entry) + break + current_entry['response'] += lines[l] + else: + continue + break + else: + continue + break + return entries + + +def main(): + log_file = 'D:\\RWKV_Runner\\api.log' if len(sys.argv) < 2 else sys.argv[1] + entries = extract_data(log_file) + + try: + import cyac + trie = cyac.Trie() + histories = [] + for entry in entries: + v = entry['prompt'] + entry['response'] + trie.insert(v) + for entry in entries: + v = entry['prompt'] + entry['response'] + for id in trie.predict(v): + pass + if trie[id] == v: + histories.append(entry) + json_data = json.dumps(histories, indent=2) + except ModuleNotFoundError: + json_data = json.dumps(entries, indent=2) + + print(json_data.encode('utf-8').decode('unicode_escape')) + + +if __name__ == "__main__": + main()