Get Users With More Than One Account in the Same Source

It turns out the previous query I submitted won’t work as expected. It can raise false positives for any identity that has an account in your target source and in any other source. It appears that a better approach is to use the /v3/search/aggregate endpoint with the following body:

{
    "query": {
        "query": "*"
    },
    "aggregationsDsl": {
        "accounts": {
            "nested": {
                "path": "accounts"
            },
            "aggs": {
                "source_id": {
                    "terms": {
                        "field": "accounts.source.id",
                        "min_doc_count": 2,
                        "size": 1000
                    },
                    "aggs": {
                        "identities": {
                            "terms": {
                                "field": "_uid",
                                "min_doc_count": 2
                            },
                            "aggs": {
                                "accounts": {
                                    "top_hits": {}
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

This will respond with a JSON object that contains the following object:

{
    "aggregations": {
        "accounts": {
            "doc_count": 8,
            "source_id": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "2c9180887671ff8c01767b4671fb7d5e",
                        "doc_count": 3,
                        "identities": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "identity#2c918085771b670d01771c567e700917",
                                    "doc_count": 2,
                                    "accounts": {
                                        "hits": {
                                            "total": 2,
                                            "max_score": 4.1795163,
                                            "hits": [
                                                {
                                                    "_nested": {
                                                        "field": "accounts",
                                                        "offset": 1
                                                    },
                                                    "_score": 4.1795163,
                                                    "_source": {
                                                        "id": "2c9180897865dbf10178698380c5390f",
                                                        "name": "jack.ryan",
                                                        "accountId": "E003",
                                                        "source": {
                                                            "id": "2c9180887671ff8c01767b4671fb7d5e",
                                                            "name": "Employees",
                                                            "type": "DelimitedFile"
                                                        },
                                                        "disabled": false,
                                                        "locked": false,
                                                        "privileged": false,
                                                        "manuallyCorrelated": false,
                                                        "entitlementAttributes": {},
                                                        "created": "2021-03-25T13:12:09.157Z"
                                                    }
                                                },
                                                {
                                                    "_nested": {
                                                        "field": "accounts",
                                                        "offset": 0
                                                    },
                                                    "_score": 4.1795163,
                                                    "_source": {
                                                        "id": "2c9180897865dbf1017869837ca93909",
                                                        "name": "jack.ryan",
                                                        "accountId": "E001",
                                                        "source": {
                                                            "id": "2c9180887671ff8c01767b4671fb7d5e",
                                                            "name": "Employees",
                                                            "type": "DelimitedFile"
                                                        },
                                                        "disabled": false,
                                                        "locked": false,
                                                        "privileged": false,
                                                        "manuallyCorrelated": false,
                                                        "entitlementAttributes": {},
                                                        "created": "2021-03-25T13:12:08.105Z"
                                                    }
                                                }
                                            ]
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    {
                        "key": "2c91808c771b686101772a91dbd877aa",
                        "doc_count": 3,
                        "identities": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": [
                                {
                                    "key": "identity#2c918085771b670d01771c567e650913",
                                    "doc_count": 2,
                                    "accounts": {
                                        "hits": {
                                            "total": 2,
                                            "max_score": 4.1795163,
                                            "hits": [
                                                {
                                                    "_nested": {
                                                        "field": "accounts",
                                                        "offset": 2
                                                    },
                                                    "_score": 4.1795163,
                                                    "_source": {
                                                        "id": "2c9180877865dbf201786a95806069dd",
                                                        "name": "sheena.martin",
                                                        "accountId": "E005",
                                                        "source": {
                                                            "id": "2c91808c771b686101772a91dbd877aa",
                                                            "name": "Ubuntu",
                                                            "type": "DelimitedFile"
                                                        },
                                                        "disabled": false,
                                                        "locked": false,
                                                        "privileged": false,
                                                        "manuallyCorrelated": false,
                                                        "entitlementAttributes": {},
                                                        "created": "2021-03-25T18:11:25.920Z"
                                                    }
                                                },
                                                {
                                                    "_nested": {
                                                        "field": "accounts",
                                                        "offset": 1
                                                    },
                                                    "_score": 4.1795163,
                                                    "_source": {
                                                        "id": "2c9180877865dbf201786a956c9b69cc",
                                                        "name": "sheena.martin",
                                                        "accountId": "E006",
                                                        "source": {
                                                            "id": "2c91808c771b686101772a91dbd877aa",
                                                            "name": "Ubuntu",
                                                            "type": "DelimitedFile"
                                                        },
                                                        "disabled": false,
                                                        "locked": false,
                                                        "privileged": false,
                                                        "manuallyCorrelated": false,
                                                        "entitlementAttributes": {},
                                                        "created": "2021-03-25T18:11:20.859Z"
                                                    }
                                                }
                                            ]
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    {
                        "key": "2c91808475b4334b0175e1dff1b563a4",
                        "doc_count": 2,
                        "identities": {
                            "doc_count_error_upper_bound": 0,
                            "sum_other_doc_count": 0,
                            "buckets": []
                        }
                    }
                ]
            }
        }
    },
...
...
...

This will return any identity that has duplicates in any source. I can’t find a way to limit it to just a single source, but it will point out any duplicates in all of the sources you have. In my environment, jack.ryan has duplicate accounts in the Employees source, and sheena.martin has duplicate accounts in the Ubuntu source.

7 Likes