Hello Community,
I have been asked by HR to come up with a report to show user ids that are missed matched across the different connected systems. I was able to get the report to work but this is taking over 4 hours to create. I am wondering if anyone has any ideas on how I can improve the timing of this to get it down to maybe a 1 to 2 hours to run.
Python Code:
import functools
import requests
import pandas as pd
import json
import os
from datetime import datetime
def getBearerToken(clientId, clientSecret, baseUrl):
token = requests.post(baseUrl + "/oauth/token?grant_type=client_credentials&client_id=" + clientId + "&client_secret=" + clientSecret)
return token
#Get the bearer token that will passed when making other api calls
tenant = "REMOVED"
baseUrl = "https://" + tenant +".api.identitynow.com"
clientId = "REMOVED"
clientSec = "REMOVED"
#Get the access token to use laster on.
token = getBearerToken(clientId, clientSec, baseUrl)
#Convert the repsonse object in a json objet
jsontoken = token.json()
#Put the access token into a variable for later use
bearerToken = jsontoken['access_token']
#Set payload and header varilbles to be passed to api calls.
payload = {}
headers = {
'Accept': 'application/json',
'Authorization': 'Bearer ' + bearerToken
}
#Time to run 1 sec
#set variables to be called later
offset = 0
apiUrl = baseUrl + "/v3/accounts?offset=" + str(offset) +"&limit=250&count=true&filters=sourceId eq \"2c918085838b347501839f3379be57a1\""
#Make first api call to grab accounts
workdayJsonData = requests.request("GET", apiUrl, headers=headers, data=payload)
#convert to Json object
workdayResponseJsonData = workdayJsonData.json()
#get the full account number of records
numberOfRecords = int(workdayJsonData.headers['X-Total-Count'])
#build dict to store all the api responses
workdayAccounts = []
#add first api data call to dict
workdayAccounts.extend(workdayResponseJsonData)
#loop though all the pages to collect all the user accounts reguardless of user's status
while offset < numberOfRecords:
offset += 250
apiUrl = baseUrl + "/v3/accounts?offset=" + str(offset) +"&limit=250&count=true&filters=sourceId eq \"2c918085838b347501839f3379be57a1\""
response = requests.request("GET", apiUrl, headers=headers, data=payload)
workdayAccounts.extend(response.json())
#Time to run 2392
#This section is to pull Active Directory Account in ISC
adOffset = 0
adApiUrl = baseUrl + "/v3/accounts?offset=" + str(adOffset) +"&limit=250&count=true&filters=sourceId eq \"2c918084838b2da60183a89af84e5324\""
adJsonData = requests.request("GET", adApiUrl, headers=headers, data=payload)
adReponseData = adJsonData.json()
adNumberOfRecords = int(adJsonData.headers['X-Total-Count'])
adAccounts = []
adAccounts.extend(adReponseData)
while adOffset < adNumberOfRecords:
adOffset += 250
adApiUrl = baseUrl + "/v3/accounts?offset=" + str(adOffset) +"&limit=250&count=true&filters=sourceId eq \"2c918084838b2da60183a89af84e5324\""
response = requests.request("GET", adApiUrl, headers=headers, data=payload)
adAccounts.extend(response.json())
#Time to run 2286
#This section is to pull ServiceNow Account in ISC
snowOffset = 0
snowApiUrl = baseUrl + "/v3/accounts?offset=" + str(snowOffset) +"&limit=250&count=true&filters=sourceId eq \"2c91808784149ed901841a9ade4169cd\" and uncorrelated eq false"
snowJsonData = requests.request("GET", snowApiUrl, headers=headers, data=payload)
snowReponseData = snowJsonData.json()
snowNumberOfRecords = int(snowJsonData.headers['X-Total-Count'])
print(str(snowNumberOfRecords))
snowAccounts = []
snowAccounts.extend(snowReponseData)
while snowOffset < snowNumberOfRecords:
snowOffset += 250
snowApiUrl = baseUrl + "/v3/accounts?offset=" + str(snowOffset) +"&limit=250&count=true&filters=sourceId eq \"2c91808784149ed901841a9ade4169cd\" and uncorrelated eq false"
response = requests.request("GET", snowApiUrl, headers=headers, data=payload)
snowAccounts.extend(response.json())
#Time to run 7999
#This section is to pull NERM Account in ISC
nermOffset = 0
nermApiUrl = baseUrl + "/v3/accounts?offset=" + str(nermOffset) +"&limit=250&count=true&filters=sourceId eq \"2c918084838b2da601839f98aba93dba\""
nermJsonData = requests.request("GET", nermApiUrl, headers=headers, data=payload)
nermReponseData = nermJsonData.json()
nermNumberOfRecords = int(nermJsonData.headers['X-Total-Count'])
nermAccounts = []
nermAccounts.extend(nermReponseData)
while nermOffset < nermNumberOfRecords:
nermOffset += 250
nermApiUrl = baseUrl + "/v3/accounts?offset=" + str(nermOffset) +"&limit=250&count=true&filters=sourceId eq \"2c918084838b2da601839f98aba93dba\""
response = requests.request("GET", nermApiUrl, headers=headers, data=payload)
nermAccounts.extend(response.json())
#Build Base Report from the 4 DataFrames
workdayDataFrame = pd.json_normalize(workdayAccounts)
adDataFrame = pd.json_normalize(adAccounts)
snowDataFrame = pd.json_normalize(snowAccounts)
nermDataFrame = pd.json_normalize(nermAccounts)
workdayReindexed = workdayDataFrame.reindex(columns=['identityId', 'cloudLifecycleState', 'identity.name', 'attributes.USERID','attributes.FILENUMBER'])
adReindexed = adDataFrame.reindex(columns=['identityId', 'cloudLifecycleState', 'identity.name', 'attributes.newSamAccountName'])
snowReindexed = snowDataFrame.reindex(columns=['identityId', 'cloudLifecycleState', 'identity.name', 'attributes.user_name'])
nermReindexed = nermDataFrame.reindex(columns=['identityId', 'cloudLifecycleState', 'identity.name', 'attributes.uid', 'attributes.sailpoint_username_ne_attribute'])
#framesToMerge = [workdayGrouped, adGrouped, snowGrouped,nermdayGrouped]
#baseReprot = functools.reduce(lambda left,right: pd.merge(left,right,on=['identityId'],how='outer'), framesToMerge)
baseReport = workdayGrouped.merge(adGrouped,on=['identityId','identity.name','cloudLifecycleState'],how='left').merge(snowGrouped,on=['identityId','identity.name','cloudLifecycleState'],how='left').merge(nermdayGrouped,on=['identityId','identity.name','cloudLifecycleState'],how='left')
renamedHeadersBaseReprot = baseReport.rename(columns={'identityId': "SailPointUID", 'cloudLifecycleState': "CloudLifeCycleState", 'identity.name': "UserDisplayName", 'attributes.USERID': "WorkdaySamAccountName", 'attributes.FILENUMBER': "WorkdayEEID", 'attributes.uid': "SecZettaEEID", 'attributes.newSamAccountName': "ADSamAccountName", 'attributes.user_name': "ServiceNowSamAccountName", 'attributes.sailpoint_username_ne_attribute': "SecZettaSamAccountName"})
renamedHeadersBaseReprot[["SailPointUID", "CloudLifeCycleState", "UserDisplayName", "WorkdayEEID", "SecZettaEEID","WorkdaySamAccountName", "ADSamAccountName", "ServiceNowSamAccountName", "SecZettaSamAccountName"]]
collectionDataFrame = []
matchingCollectionDataFrame = []
for index, row in renamedHeadersBaseReprot.iterrows():
if not str(row["WorkdaySamAccountName"]).lower() == str(row["ADSamAccountName"]).lower() == str(row["ServiceNowSamAccountName"]).lower():
collectionDataFrame.append(row)
elif str(row["WorkdaySamAccountName"]).lower() == str(row["ADSamAccountName"]).lower() == str(row["ServiceNowSamAccountName"]).lower():
matchingCollectionDataFrame.append(row)
collection = pd.DataFrame(collectionDataFrame)
matchingCollection = pd.DataFrame(matchingCollectionDataFrame)
finalFilter = collection.reset_index()
matchingFinalFilter = matchingCollection.reset_index()
finalReport = finalFilter[["SailPointUID", "CloudLifeCycleState", "UserDisplayName", "WorkdayEEID", "SecZettaEEID","WorkdaySamAccountName", "ADSamAccountName", "ServiceNowSamAccountName", "SecZettaSamAccountName"]]
matchingFinalReport = matchingFinalFilter[["SailPointUID", "CloudLifeCycleState", "UserDisplayName", "WorkdayEEID", "SecZettaEEID","WorkdaySamAccountName", "ADSamAccountName", "ServiceNowSamAccountName", "SecZettaSamAccountName"]]
datetimenow = datetime.now()
filename = "MisMatched_Final_Report_" + str(datetimenow.year) + '-' + str(datetimenow.month) + '-' + str(datetimenow.day)
matchingFileName = "Matching_Final_Report" + str(datetimenow.year) + '-' + str(datetimenow.month) + '-' + str(datetimenow.day)
finalReport.reset_index().to_csv(os.environ['USERPROFILE'] + '\\downloads\\' + filename + '.csv', index=False)
matchingFinalReport.reset_index().to_csv(os.environ['USERPROFILE'] + '\\downloads\\' + matchingFileName + '.csv', index=False)
fullFilterData = []
for index, row in finalFilter.iterrows():
if not row['CloudLifeCycleState'] == 'inactive' and not pd.isna(row['ADSamAccountName']):
fullFilterData.append(row)
fullFilterDataFrame = pd.DataFrame(fullFilterData)
filternonmatchingFileName = "filternonmatching" + str(datetimenow.year) + '-' + str(datetimenow.month) + '-' + str(datetimenow.day)
fullFilterDataFrame.reset_index().to_csv(os.environ['USERPROFILE'] + '\\downloads\\' + filternonmatchingFileName + '.csv', index=False)