arthur.bebou

Le site arthur.bebou.netlib.re - retour accueil

git clone git://bebou.netlib.re/arthur.bebou
Log | Files | Refs |

sub.py (2988B)


      1 #!/usr/bin/env python3
      2 import re
      3 import sys
      4 import time
      5 
      6 # Constants
      7 _NB_RUN = 10
      8 _PATTERN = re.compile(r"https://sws.geonames.org/(\d+)/about.rdf")
      9 # _PATH_TXT = "./data/input_big.xml"
     10 _PATH_TXT = "./A"
     11 _PATH_MAP = "corres"
     12 _PATH_RES = "output.xml"
     13 
     14 
     15 def build_map(data):
     16     """
     17     Build a mapping dictionary from a list of data.
     18 
     19     Args:
     20         data (list): A list of strings where each even-indexed element is a key
     21                      and each odd-indexed element is a value.
     22 
     23     Returns:
     24         dict: A dictionary mapping keys to values.
     25     """
     26     return {data[k][:-1]: data[k - 1][:-1] for k in range(1, len(data), 2)}
     27 
     28 
     29 def replace_match(match, mapping):
     30     """
     31     Replace a matched string using a mapping dictionary.
     32 
     33     Args:
     34         match (re.Match): A match object containing the matched string.
     35         mapping (dict): A dictionary mapping keys to replacement values.
     36 
     37     Returns:
     38         str: The replacement string.
     39     """
     40     key = match.group(1)
     41     return mapping.get(key, match.group(0))
     42 
     43 
     44 def read_file(file_path):
     45     """
     46     Read the content of a file.
     47 
     48     Args:
     49         file_path (str): The path to the file to be read.
     50 
     51     Returns:
     52         str: The content of the file as a string.
     53     """
     54     try:
     55         with open(file_path, "rb") as file:
     56             return file.read().decode("utf-8")
     57     except FileNotFoundError:
     58         sys.exit(f"Error: File '{file_path}' not found.")
     59     except Exception as e:
     60         sys.exit(f"Error reading file '{file_path}': {str(e)}")
     61 
     62 
     63 def write_file(file_path, content):
     64     """
     65     Write content to a file.
     66 
     67     Args:
     68         file_path (str): The path to the file to be written.
     69         content (str): The content to write to the file.
     70     """
     71     try:
     72         with open(file_path, "wb") as file:
     73             file.write(content.encode("utf-8"))
     74     except Exception as e:
     75         sys.exit(f"Error writing to file '{file_path}': {str(e)}")
     76 
     77 
     78 def process(input_file, map_file, pattern):
     79     """
     80     Process input data using a pattern and a mapping dictionary and write the
     81     result to an output file.
     82 
     83     Args:
     84         input_file (str): The path to the input file.
     85         map_file (str): The path to the map file.
     86         pattern (re.Pattern): A regular expression pattern to match.
     87 
     88     """
     89     txt = read_file(input_file)
     90     mapping = build_map(read_file(map_file))
     91     output = pattern.sub(lambda match: replace_match(match, mapping), txt)
     92     write_file(_PATH_RES, output)
     93 
     94 
     95 if __name__ == "__main__":
     96     times = []
     97     print(f"Starting batch of {_NB_RUN} runs...")
     98     for run, _ in enumerate(range(_NB_RUN)):
     99         start_time = time.time()
    100         process(_PATH_TXT, _PATH_MAP, _PATTERN)
    101         sys.stdout.write(f"\rRun {run + 1}/{_NB_RUN} completed")
    102         sys.stdout.flush()
    103         times.append(time.time() - start_time)
    104 
    105     print("\nExecution times:")
    106     print(f" - min: {min(times):.5f} sec")
    107     print(f" - avg: {sum(times) / _NB_RUN:.5f} sec")
    108     print(f" - max: {max(times):.5f} sec")