Le site arthur.bebou.netlib.re - retour accueil
git clone git://bebou.netlib.re/arthur.bebou
Log | Files | Refs |
sub.py (2988B)
1 #!/usr/bin/env python3 2 import re 3 import sys 4 import time 5 6 # Constants 7 _NB_RUN = 10 8 _PATTERN = re.compile(r"https://sws.geonames.org/(\d+)/about.rdf") 9 # _PATH_TXT = "./data/input_big.xml" 10 _PATH_TXT = "./A" 11 _PATH_MAP = "corres" 12 _PATH_RES = "output.xml" 13 14 15 def build_map(data): 16 """ 17 Build a mapping dictionary from a list of data. 18 19 Args: 20 data (list): A list of strings where each even-indexed element is a key 21 and each odd-indexed element is a value. 22 23 Returns: 24 dict: A dictionary mapping keys to values. 25 """ 26 return {data[k][:-1]: data[k - 1][:-1] for k in range(1, len(data), 2)} 27 28 29 def replace_match(match, mapping): 30 """ 31 Replace a matched string using a mapping dictionary. 32 33 Args: 34 match (re.Match): A match object containing the matched string. 35 mapping (dict): A dictionary mapping keys to replacement values. 36 37 Returns: 38 str: The replacement string. 39 """ 40 key = match.group(1) 41 return mapping.get(key, match.group(0)) 42 43 44 def read_file(file_path): 45 """ 46 Read the content of a file. 47 48 Args: 49 file_path (str): The path to the file to be read. 50 51 Returns: 52 str: The content of the file as a string. 53 """ 54 try: 55 with open(file_path, "rb") as file: 56 return file.read().decode("utf-8") 57 except FileNotFoundError: 58 sys.exit(f"Error: File '{file_path}' not found.") 59 except Exception as e: 60 sys.exit(f"Error reading file '{file_path}': {str(e)}") 61 62 63 def write_file(file_path, content): 64 """ 65 Write content to a file. 66 67 Args: 68 file_path (str): The path to the file to be written. 69 content (str): The content to write to the file. 70 """ 71 try: 72 with open(file_path, "wb") as file: 73 file.write(content.encode("utf-8")) 74 except Exception as e: 75 sys.exit(f"Error writing to file '{file_path}': {str(e)}") 76 77 78 def process(input_file, map_file, pattern): 79 """ 80 Process input data using a pattern and a mapping dictionary and write the 81 result to an output file. 82 83 Args: 84 input_file (str): The path to the input file. 85 map_file (str): The path to the map file. 86 pattern (re.Pattern): A regular expression pattern to match. 87 88 """ 89 txt = read_file(input_file) 90 mapping = build_map(read_file(map_file)) 91 output = pattern.sub(lambda match: replace_match(match, mapping), txt) 92 write_file(_PATH_RES, output) 93 94 95 if __name__ == "__main__": 96 times = [] 97 print(f"Starting batch of {_NB_RUN} runs...") 98 for run, _ in enumerate(range(_NB_RUN)): 99 start_time = time.time() 100 process(_PATH_TXT, _PATH_MAP, _PATTERN) 101 sys.stdout.write(f"\rRun {run + 1}/{_NB_RUN} completed") 102 sys.stdout.flush() 103 times.append(time.time() - start_time) 104 105 print("\nExecution times:") 106 print(f" - min: {min(times):.5f} sec") 107 print(f" - avg: {sum(times) / _NB_RUN:.5f} sec") 108 print(f" - max: {max(times):.5f} sec")