96342cc6b3f9424cd48b746e72d790b883aad958,mlflow/_spark_autologging.py,,autolog,#,150

Before Change


    wrap_patch(SparkSession, "__init__", __init__)

    active_session = _get_active_spark_session()
    if active_session is not None:
        // We know SparkContext exists here already, so get it
        sc = SparkContext.getOrCreate()

        _listen_for_spark_activity(sc)


def _get_repl_id():
    
    Get a unique REPL ID for a PythonSubscriber instance. This is used to distinguish between
    REPLs in multitenant, REPL-aware environments where multiple Python processes may share the

After Change


def autolog():
    Implementation of Spark datasource autologging
    global _spark_table_info_listener
    if _get_current_listener() is None:
        active_session = _get_active_spark_session()
        if active_session is None:
            raise MlflowException(
                "No active SparkContext found, refusing to enable Spark datasource "
                "autologging. Please create a SparkSession e.g. via "
                "SparkSession.builder.getOrCreate() (see API docs at "
                "https://spark.apache.org/docs/latest/api/python/"
                "pyspark.sql.html//pyspark.sql.SparkSession) "
                "before attempting to enable autologging"
            )
        // We know SparkContext exists here already, so get it
        sc = SparkContext.getOrCreate()
        if _get_spark_major_version(sc) < 3:
            raise MlflowException("Spark autologging unsupported for Spark versions < 3")
        gw = active_session.sparkContext._gateway
        params = gw.callback_server_parameters
        callback_server_params = CallbackServerParameters(
            address=params.address,
            port=params.port,
            daemonize=True,
            daemonize_connections=True,
            eager_load=params.eager_load,
            ssl_context=params.ssl_context,
            accept_timeout=params.accept_timeout,
            read_timeout=params.read_timeout,
            auth_token=params.auth_token,
        )
        gw.start_callback_server(callback_server_params)

        event_publisher = _get_jvm_event_publisher()
        try:
            event_publisher.init(1)
            _spark_table_info_listener = PythonSubscriber()
            _spark_table_info_listener.register()
        except Exception as e:
            raise MlflowException(
                "Exception while attempting to initialize JVM-side state for "
                "Spark datasource autologging. Please ensure you have the "
                "mlflow-spark JAR attached to your Spark session as described "
                "in http://mlflow.org/docs/latest/tracking.html//"
                "automatic-logging-from-spark-experimental. Exception:\n%s" % e
            )

        // Register context provider for Spark autologging
        from mlflow.tracking.context.registry import _run_context_provider_registry

        _run_context_provider_registry.register(SparkAutologgingContext)


def _get_repl_id():
    
    Get a unique REPL ID for a PythonSubscriber instance. This is used to distinguish between
    REPLs in multitenant, REPL-aware environments where multiple Python processes may share the

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances

Link

Project Name: mlflow/mlflow

Commit Name: 96342cc6b3f9424cd48b746e72d790b883aad958

Time: 2020-10-20

Author: 39497902+dbczumar@users.noreply.github.com

File Name: mlflow/_spark_autologging.py

Class Name:

Method Name: autolog

Link

Project Name: mlflow/mlflow

Commit Name: 9abf61cedb2d84b200eec76d8e036d8c92243e94

Time: 2020-07-13

Author: hkawamura0130@gmail.com

File Name: mlflow/store/model_registry/sqlalchemy_store.py

Class Name: SqlAlchemyStore

Method Name: transition_model_version_stage

Link

Project Name: mlflow/mlflow

Commit Name: 4f9fc9b8698c84f7a7281a2692657a2f1c1368d6

Time: 2020-06-30

Author: 52183359+ankitmathur-db@users.noreply.github.com

File Name: mlflow/utils/search_utils.py

Class Name: SearchUtils

Method Name: _parse_order_by_string