Leveraging Python to Save KMS costs when using S3
Reducing KMS costs with S3 using Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import boto3
import pandas as pd
def get_s3_bucket_objects_count(bucket_name):
s3_client = boto3.client('s3')
response = s3_client.list_objects_v2(Bucket=bucket_name)
count = response.get('KeyCount', 0)
return count
def get_all_s3_buckets():
s3_client = boto3.client('s3')
response = s3_client.list_buckets()
buckets = response['Buckets']
return buckets
def get_read_heavy_buckets(storage_lens_client, account_id):
read_heavy_buckets = []
try:
response = storage_lens_client.list_storage_lens_configurations(AccountId=account_id)
configurations = response['StorageLensConfigurations']
for config in configurations:
if config['LensType'] == 'Organization':
read_heavy_buckets.extend(config['BucketLevel']['ActivityMetrics']['ReadIOBytes']['Bucket'])
except KeyError:
pass
return read_heavy_buckets
# AWS S3 Configuration
aws_profile = 'YOUR_PROFILE'
region_name = 'YOUR_REGION'
# Initialize AWS clients using AWS profile
session = boto3.Session(profile_name=aws_profile, region_name=region_name)
s3_client = session.client('s3')
storage_lens_client = session.client('s3control')
# Get AWS account ID
account_id = session.client('sts').get_caller_identity().get('Account')
# Get all S3 buckets
buckets = get_all_s3_buckets()
# Calculate object counts for each bucket
bucket_object_counts = {}
for bucket in buckets:
bucket_name = bucket['Name']
object_count = get_s3_bucket_objects_count(bucket_name)
bucket_object_counts[bucket_name] = object_count
# Create a Pandas DataFrame with bucket names and object counts
df = pd.DataFrame(list(bucket_object_counts.items()), columns=['Bucket Name', 'Object Count'])
# Save the DataFrame to an Excel file
output_file = 's3_bucket_objects.xlsx'
df.to_excel(output_file, index=False)
# Get read-heavy buckets
read_heavy_buckets = get_read_heavy_buckets(storage_lens_client, account_id)
# Create a Pandas DataFrame with read-heavy bucket names
df_read_heavy = pd.DataFrame(read_heavy_buckets, columns=['Read-Heavy Buckets'])
# Save the DataFrame to the same Excel file, in a separate sheet
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a') as writer:
df_read_heavy.to_excel(writer, sheet_name='Read-Heavy Buckets', index=False)