Source code for pattern_clustering.cli

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of the pattern-clustering project.
# https://github.com/nokia/pattern-clustering

"""Console script for pattern_clustering."""

__author__     = "Marc-Olivier Buob, Maxime Raynal"
__maintainer__ = "Marc-Olivier Buob, Maxime Raynal"
__email__      = "marc-olivier.buob@nokia-bell-labs.com, maxime.raynal@nokia.com"
__copyright__  = "Copyright (C) 2022, Nokia"
__license__    = "BSD-3"

import json
import sys
from collections import defaultdict
from optparse import OptionParser
from pattern_clustering import (
    MAP_NAME_RE, PatternClusteringEnv, pattern_clustering_to_html,
    pattern_clustering_with_preprocess, pattern_clustering_without_preprocess,
    pattern_distance
)


[docs]def message(s: str): print(s, file=sys.stderr)
[docs]def warning(s: str): message(f"[WARNING] {s}")
[docs]def error(s: str): message(f"[ERROR] {s}") sys.exit(1)
[docs]def info(s: str): message(f"[INFO] {s}")
[docs]def main_pattern_clustering_mkconf(): """Console script for ``pattern-clustering-mkconf``.""" print( json.dumps( { "threshold": 0.6, "patterns": MAP_NAME_RE }, indent=4 ) )
[docs]def main_pattern_distance(): """Console script for ``pattern-distance``.""" parser = OptionParser(usage="usage: %prog [options] ARG1 ARG2") parser.add_option( "-c", "--config", metavar = "CONFIG_FILENAME", type = "str", dest = "config_filename", help = "Path to the JSON configuration file. Supersedes command-line parameters.", default = None ) parser.add_option( "-n", "--normalized", dest = "normalized", help = "Returns a distance between 0.0 and 1.0", action = "store_true" ) parser.add_option( "-v", "--verbose", dest = "verbose", help = "Verbose mode", action = "store_true" ) (options, args) = parser.parse_args() if args: warning(f"Ignored argument: {args}") # Load parameters verbose = options.verbose conf = defaultdict() if options.config_filename: if verbose: info( f"Loading parameters from {options.config_filename} configuration file." " Command-line parameters are superseded by those specified in the " "configuration file." ) with open(options.config_filename) as f_conf: conf = json.load(f_conf) patterns = conf.get("patterns", None) if patterns: PatternClusteringEnv.patterns = patterns elif verbose: info(f"Using command-line and default parameters.") w1 = args[0] w2 = args[1] normalized = options.normalized if verbose: info(f"Comparing\nw1: {w1}\nw2: {w2}\nnormalized: {normalized}") print(pattern_distance(w1, w2, normalized=normalized)) return 0
[docs]def main_pattern_clustering(): """Console script for ``pattern-clustering``.""" parser = OptionParser(usage="usage: %prog [options] INPUT_FILENAME") parser.add_option( "-c", "--config", metavar = "CONFIG_FILENAME", type = "str", dest = "config_filename", help = "Path to the JSON configuration file. Supersedes command-line parameters.", default = None ) parser.add_option( "-H", "--html-file", metavar = "OUTPUT_FILENAME", type = "str", dest = "html_output_filename", help = "Path to the output HTML file used for human readable results.", default = None ) parser.add_option( "-i", "--input-file", metavar = "INPUT_FILENAME", type = "str", dest = "input_filename", help = "Path to the input (log) file. Defaults to the standard input.", default = sys.stdin ) parser.add_option( "-n", "--no-async", dest = "no_async", help = "Disable asynchronous computations.", action = "store_true" ) parser.add_option( "-o", "--output-file", metavar = "OUTPUT_FILENAME", type = "str", dest = "output_filename", help = "Path to the JSON output file. Defaults to the standard output.", default = None ) parser.add_option( "-p", "--with-preprocessing", dest = "with_preprocessing", help = "Automatically groups in the same cluster every line having the same pattern-level structure. This accelerates the computation but may lead to inaccurate clusters.", action = "store_true" ) parser.add_option( "-t", "--threshold", metavar = "THRESHOLD", type = "float", dest = "threshold", help = "Threshold value used by the clustering, between 0.0 and 1.0. The lower the value, the smaller the clusters. Defaults to 0.6", default = 0.6 ) parser.add_option( "-v", "--verbose", dest = "verbose", help = "Verbose mode", action = "store_true" ) (options, args) = parser.parse_args() if args: warning(f"Ignored argument: {args}") # Load parameters verbose = options.verbose threshold = options.threshold conf = defaultdict() no_async = options.no_async if options.config_filename: if verbose: info( f"Loading parameters from {options.config_filename} configuration file." " Command-line parameters are superseded by those specified in the " "configuration file." ) with open(options.config_filename) as f_conf: conf = json.load(f_conf) patterns = conf.get("patterns", None) if patterns: PatternClusteringEnv.patterns = patterns threshold = conf.get("threshold", threshold) no_async = conf.get("no_async", no_async) elif verbose: info(f"Using command-line and default parameters.") use_async = not options.no_async pattern_clustering = ( pattern_clustering_with_preprocess if options.with_preprocessing else pattern_clustering_without_preprocess ) # Verbose messages if verbose: info(f"threshold: {threshold}") info(f"env:\n{PatternClusteringEnv()}") info(f"pattern_clustering: {pattern_clustering}") with open(options.input_filename) as f_in: lines = [line.strip() for line in f_in.readlines()] map_row_cluster = pattern_clustering( lines, max_dist=threshold, use_async=use_async ) if options.output_filename: if verbose: info(f"Writting results to {options.output_filename}") with open(options.output_filename, "w") as f_out: json.dump(map_row_cluster, fp=f_out) else: print(json.dumps(map_row_cluster)) if options.html_output_filename: with open(options.html_output_filename, "w") as f_html: print(f"<html><body>{pattern_clustering_to_html(lines, map_row_cluster)}</body></html>", file=f_html) return 0
[docs]def main(): # Required by sphinx return main_pattern_clustering() # pragma: no cover
if __name__ == "__main__": sys.exit(main()) # pragma: no cover