7d9a7fead18653a43645c3d673499cf639337406,lambdas/es/indexer/index.py,,handler,#Any#Any#,216

Before Change


            try:
                event_name = event_["eventName"]
                // only process these two event types
                if event_name not in [OBJECT_DELETE, OBJECT_PUT]:
                    continue
                bucket = unquote(event_["s3"]["bucket"]["name"])
                // In the grand tradition of IE6, S3 events turn spaces into "+"
                key = unquote_plus(event_["s3"]["object"]["key"])
                version_id = event_["s3"]["object"].get("versionId")
                version_id = unquote(version_id) if version_id else None
                // OBJECT_DELETE does not include "eTag"
                etag = unquote(event_["s3"]["object"].get("eTag", ""))

                // Get two levels of extensions to handle files like .csv.gz
                path = pathlib.PurePosixPath(key)
                ext1 = path.suffix
                ext2 = path.with_suffix("").suffix
                ext = (ext2 + ext1).lower()

                // Handle delete  first and then continue so that
                // head_object and get_object (below) don"t fail
                if event_name == OBJECT_DELETE:
                    batch_processor.append(
                        event_name,
                        bucket=bucket,
                        ext=ext,
                        etag=etag,
                        key=key,
                        last_modified=now_like_boto3(),
                        text="",
                        version_id=version_id
                    )
                    continue

                try:
                    head = retry_s3(
                        "head",
                        bucket,
                        key,
                        s3_client=s3_client,
                        version_id=version_id,
                        etag=etag
                    )
                except botocore.exceptions.ClientError as exception:
                    // "null" version sometimes results in 403s for buckets
                    // that have changed versioning, retry without it
                    if (exception.response.get("Error", {}).get("Code") == "403"
                            and version_id == "null"):
                        head = retry_s3(
                            "head",
                            bucket,
                            key,
                            s3_client=s3_client,
                            version_id=None,
                            etag=etag
                        )
                    else:
                        raise exception

                size = head["ContentLength"]
                last_modified = head["LastModified"]
                meta = head["Metadata"]

                try:
                    text = get_contents(
                        bucket,
                        key,
                        ext,
                        etag=etag,
                        version_id=version_id,
                        s3_client=s3_client,
                        size=size
                    )
                // we still want an entry for this document in elastic so that, e.g.,
                // the file counts from elastic are correct. re-raise below.
                except Exception as exc:  // pylint: disable=broad-except
                    text = ""
                    content_exception = exc
                    print("Content extraction failed", exc, bucket, key, etag, version_id)

                // decode Quilt-specific metadata
                if meta and "helium" in meta:
                    try:
                        decoded_helium = json.loads(meta["helium"])
                        meta["helium"] = decoded_helium or {}
                    except (KeyError, json.JSONDecodeError):
                        print("Unable to parse Quilt "helium" metadata", meta)

                batch_processor.append(
                    event_name,
                    bucket=bucket,
                    key=key,

After Change


            try:
                event_name = event_["eventName"]
                // Process all Create:* and Remove:* events
                if not any(event_name.startswith(n) for n in EVENT_PREFIX.values()):
                    continue
                bucket = unquote(event_["s3"]["bucket"]["name"])
                // In the grand tradition of IE6, S3 events turn spaces into "+"
                key = unquote_plus(event_["s3"]["object"]["key"])
                version_id = event_["s3"]["object"].get("versionId")
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: quiltdata/quilt
Commit Name: 7d9a7fead18653a43645c3d673499cf639337406
Time: 2020-06-03
Author: akarve@users.noreply.github.com
File Name: lambdas/es/indexer/index.py
Class Name:
Method Name: handler


Project Name: IDSIA/sacred
Commit Name: 216e20a8ca8e53aa1da7538b242ce29e2d521d7c
Time: 2014-09-25
Author: klaus@idsia.ch
File Name: sacred/commands.py
Class Name:
Method Name: non_unicode_repr


Project Name: comic/grand-challenge.org
Commit Name: e357e1dbcdf36eb12be0bc5043e0a098f94a833a
Time: 2017-05-09
Author: jamesmeakin@gmail.com
File Name: django/comicsite/templatetags/comic_templatetags.py
Class Name: comic_URLNode
Method Name: render