PronounsPage/server/cleanupImages.ts

258 lines
7.6 KiB
TypeScript

import './setup.ts';
import { S3 } from '@aws-sdk/client-s3';
import type { ListObjectsV2Output, ObjectIdentifier, _Object } from '@aws-sdk/client-s3';
import { awsConfig, awsParams } from './aws.ts';
import dbConnection from './db.ts';
import type { Profile } from '~/src/profile.ts';
const FRESH_CUTOFF = 6 * 60 * 60 * 1000; // 6 hours
const execute = process.env.EXECUTE === '1';
console.log(execute ? 'WILL REMOVE FILES!' : 'Dry run');
const validateIds = (type: string, data: Record<string, boolean>, expectedMinimumSize: number = 0): void => {
console.log(`${type}: ${Object.keys(data).length}`);
if (Object.keys(data).length <= expectedMinimumSize) {
throw new Error('List of ids has unexpectedly few items!');
}
const invalidKeys = [];
for (const key of Object.keys(data)) {
if (!key.match(/^[A-Z0-9]{26}$/)) {
invalidKeys.push(key);
}
}
if (invalidKeys.length) {
console.warn(`${invalidKeys.length} invalid ${type} keys: ${invalidKeys.join(', ')}`);
if (invalidKeys.length > 10) {
throw new Error('Too many invalid keys');
}
}
};
async function cleanup(): Promise<void> {
console.log('--- Fetching ids expected to stay ---');
const db = await dbConnection();
const avatars: Record<string, true> = {};
for (const row of await db.all(`
SELECT avatarSource
FROM users
WHERE avatarSource LIKE 'https://%/images/%'
`)) {
avatars[row.avatarSource.match('https://[^/]+/images/(.*)-(?:thumb|avatar).png')[1]] = true;
}
const flags: Record<string, true> = {};
for (const row of await db.all(`
SELECT customFlags
FROM profiles
WHERE customFlags != '[]'
`)) {
for (const { value } of JSON.parse(row.customFlags)) {
flags[value] = true;
}
}
const sources: Record<string, true> = {};
for (const row of await db.all(`
SELECT images
FROM sources
WHERE images is not null AND images != '' AND deleted = 0
`)) {
for (const key of row.images.split(',')) {
sources[key] = true;
}
}
const terms: Record<string, true> = {};
for (const row of await db.all(`
SELECT images
FROM terms
WHERE images is not null AND images != ''
`)) {
for (const key of row.images.split(',')) {
terms[key] = true;
}
}
const cards: Record<string, true> = {};
for (const row of await db.all<Pick<Profile, 'card'>[]>(`
SELECT card
FROM profiles
WHERE card is not null AND card != ''
`)) {
cards[row.card!.match(
/(https:\/\/pronouns-page.s3.eu-west-1.amazonaws.com\/card\/[^/]+\/.+-([^-]+)\.png)/,
)![1]] = true;
}
for (const row of await db.all(`
SELECT cardDark
FROM profiles
WHERE cardDark is not null AND cardDark != ''
`)) {
const m = row.cardDark.match(
/https:\/\/pronouns-page.s3.eu-west-1.amazonaws.com\/card\/[^/]+\/.+-([^-]+)-dark\.png/,
);
if (!m) {
console.error(row.cardDark);
continue;
}
cards[m[1]] = true;
}
validateIds('Avatars', avatars, 500_000);
validateIds('Flags', flags, 500_000);
validateIds('Sources', sources, 200);
validateIds('Terms', terms, 4_000);
validateIds('Cards', cards, 50_000);
console.log('--- Cleaning up S3: Images ---');
const s3 = new S3(awsConfig);
let overall = 0;
let fresh = 0;
let removed = 0;
let removedSize = 0;
let awsRequests = 0;
const chunkSize = 1000;
let continuationToken: string | undefined = undefined;
while (true) {
console.log('Making a request');
awsRequests++;
const objects: ListObjectsV2Output = await s3.listObjectsV2({
Prefix: 'images/',
MaxKeys: chunkSize,
ContinuationToken: continuationToken,
...awsParams,
});
if (!objects.Contents) {
break;
}
continuationToken = objects.NextContinuationToken;
const toRemove: ObjectIdentifier[] = [];
const remove = async (object: _Object, reason: string): Promise<void> => {
console.log(`REMOVING: ${object.Key} (${reason})`);
toRemove.push({ Key: object.Key! });
removed += 1;
removedSize += object.Size!;
};
for (const object of objects.Contents) {
overall++;
if (object.LastModified!.getTime() > new Date().getTime() - FRESH_CUTOFF) {
fresh++;
continue;
}
const [, id, size] = object.Key!.match('images/(.*)-(.*).png')!;
if (avatars[id]) {
if (size !== 'thumb' && size !== 'avatar') {
await remove(object, 'avatar');
}
} else if (flags[id]) {
if (size !== 'flag') {
await remove(object, 'flag');
}
} else if (sources[id]) {
if (size !== 'big' && size !== 'thumb') {
await remove(object, 'source');
}
} else if (terms[id]) {
if (size !== 'flag') {
await remove(object, 'term');
}
} else {
await remove(object, 'not used');
}
}
if (execute && toRemove.length) {
console.log('--- Removal request ---');
awsRequests++;
await s3.deleteObjects({
Delete: {
Objects: toRemove,
},
...awsParams,
});
}
if (objects.Contents.length < chunkSize) {
break;
}
}
console.log('--- Cleaning up S3: Cards ---');
continuationToken = undefined;
while (true) {
console.log('Making a request');
awsRequests++;
const objects: ListObjectsV2Output = await s3.listObjectsV2({
Prefix: 'card/',
MaxKeys: chunkSize,
ContinuationToken: continuationToken,
...awsParams,
});
if (!objects.Contents) {
break;
}
continuationToken = objects.NextContinuationToken;
const toRemove = [];
for (const object of objects.Contents) {
overall++;
if (object.LastModified!.getTime() > new Date().getTime() - FRESH_CUTOFF) {
fresh++;
continue;
}
const id = object.Key!.endsWith('-dark.png')
? object.Key!.match(/card\/[^/]+\/.+-([^-]+)-dark\.png/)![1]
: object.Key!.match(/card\/[^/]+\/.+-([^-]+)\.png/)![1];
if (!cards[id]) {
console.log(`REMOVING: ${object.Key}`);
toRemove.push({ Key: object.Key! });
removed += 1;
removedSize += object.Size!;
}
}
if (execute && toRemove.length) {
console.log('--- Removal request ---');
awsRequests++;
await s3.deleteObjects({
Delete: {
Objects: toRemove,
},
...awsParams,
});
}
if (objects.Contents.length < chunkSize) {
break;
}
}
console.log('--- Summary ---');
console.log(`Overall: ${overall}`);
console.log(`Fresh: ${fresh}`);
console.log(`Removed: ${removed}`);
console.log(`Removed size: ${Math.round(removedSize / 1024 / 1024)} MB`);
console.log(`AWS requests: ${awsRequests}`);
}
cleanup();