import pandas as pd
from minio import Minio
import io
import json
from datetime import datetime
client = Minio(
"10.20.30.200:9000",
access_key="YOUR_ACCESS_KEY",
secret_key="YOUR_SECRET_KEY",
secure=False
)
def process_inventory():
try:
objects = client.list_objects("devops", recursive=True)
report_files = sorted([obj.object_name for obj in objects if obj.object_name.endswith('.parquet')])
if not report_files:
print("No inventory files found in 'devops' bucket.")
return
latest_report = report_files[-1]
print(f"Processing report: {latest_report}")
response = client.get_object("devops", latest_report)
df = pd.read_parquet(io.BytesIO(response.read()))
stats = df.groupby('prefix').agg({
'object_count': 'sum',
'total_bytes': 'sum'
}).reset_index()
today = datetime.now().strftime('%Y-%m-%d')
result_json = stats.to_json(orient='records', indent=4, force_ascii=False)
result_bytes = result_json.encode('utf-8')
target_path = f"stats/daily_summary_{today}.json"
client.put_object(
"main-data",
target_path,
io.BytesIO(result_bytes),
len(result_bytes),
content_type="application/json"
)
print(f"Success! Statistics saved to {target_path}")
print(stats.head())
except Exception as e:
print(f"Error occurred: {e}")
if __name__ == "__main__":
process_inventory()