Source code for wtgseal.utils

"""Utilities for wtg-seal.

This module offers utilities function for wtg-seal, mainly focused on
file parsing.

"""

from typing import Counter, Generator, List, TextIO  # noqa


[docs]def parse_documents(file: TextIO, /) -> Generator[List[str], None, None]: # noqa: E225 """Parse a text file containing documents definitions into lists. Read a file containing web documents representations and generates lists of URIs related to each object. Parameters ---------- file : {TextIO} A file handler to the file to be parsed. The file should contain one or more lines, which being a space-separated list of integers. Each line represents a document to be retrieved from a web server. A document is made of one or more files, given by the integers. Yields ------ List[str] The representation of the next document, *i.e.* the URIs of the files that compose the document. Notes ----- Ideally, `file` is the file `objout.txt` generated by the program `objects`, part of SURGE [1]_. References ---------- .. [1] Barford, P., & Crovella, M. (1998, June). Generating representative web workloads for network and server performance evaluation. In *Proceedings of the 1998 ACM SIGMETRICS joint international conference on Measurement and modeling of computer systems* (pp. 151-160). """ for line in file: yield [f'/{x}.txt' for x in line.split()]
[docs]def parse_requests(file: TextIO, /) -> Generator[int, None, None]: # noqa: E225 """Parse a text file containing a sequence of documents requests. Read a file containing a sequence of documents identifiers which was to be requested to a web server. Parameters ---------- file : {TextIO} A file handler to the file to be parsed. The file should contain one or more lines with an integer in each of them. Yields ------ int The index of the next document to be requested. Notes ----- Ideally, `file` is the file `name.txt` generated by the program `lru`, part of SURGE [1]_. References ---------- .. [1] Barford, P., & Crovella, M. (1998, June). Generating representative web workloads for network and server performance evaluation. In *Proceedings of the 1998 ACM SIGMETRICS joint international conference on Measurement and modeling of computer systems* (pp. 151-160). """ for x in file: yield int(x)
[docs]def count_requests(file: TextIO, /) -> Counter: # noqa: E225 """Count the number of requests for each document. Count the number of requests made for each document based on a given sequence of document requests. Parameters ---------- file : {TextIO} A file containing a sequence of integers representing indexes of documents in a web server. Returns ------- Frequency A `collection.Counter` with the frequencies for each document. See Also -------- parse_requests """ parser = parse_requests(file) frequencies = Counter(parser) return frequencies
[docs]def calc_weights(freqs: Counter) -> Counter: """Calculate weights for locust tasks. Calculate locust task weights based on the frequency that each document was requested, as given by `freqs`. Parameters ---------- freqs : {Counter} The number of requests for each document. Returns ------- Counter The weight for each document when mapped as a locust task. """ weights = Counter(freqs) _, least = freqs.most_common()[-1] for key in weights: weights[key] = round(weights[key] / least) return weights