Fix duplicate ledger fees: explicit idempotency + auto-dedupe

The unique expense index can silently fail to build over pre-existing
duplicate data, so re-syncs were re-adding ledger fees every run.

- syncLedgerEntries now explicitly checks existing references (and
  de-dupes within the batch) instead of trusting the unique index
- dedupeLedgerExpenses keeps one row per etsy-ledger-<entry_id> and
  deletes the rest; runs automatically at the start of each sync so
  existing duplicates self-heal. Distinct entries sharing a date/amount
  are untouched (each has its own reference).
- Sync response/toast report deduped and already-imported counts

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
dlawler489 2026-06-13 17:23:01 +10:00
parent 8e6680f2de
commit a789f01bb4
3 changed files with 54 additions and 19 deletions

View file

@ -127,10 +127,13 @@ const Settings = () => {
setUnknownDebits([]);
try {
const res = await api.post('/etsy/sync');
const { created, updated, unmatchedItems: unmatched, receiptsSeen, ledger, legacyEtsyExpenses: legacy } = res.data;
const { created, updated, unmatchedItems: unmatched, receiptsSeen, ledger, legacyEtsyExpenses: legacy, dedupedFees } = res.data;
toast.success(`Synced ${receiptsSeen} Etsy orders: ${created} new, ${updated} updated`);
if (dedupedFees > 0) {
toast.success(`Cleaned up ${dedupedFees} duplicate fee expense(s) from earlier syncs`);
}
if (ledger) {
toast.success(`Fees from Etsy ledger: ${ledger.feesCreated} new expense(s) imported`);
toast.success(`Fees from Etsy ledger: ${ledger.feesCreated} new, ${ledger.feesDuplicate} already imported`);
setUnknownDebits(ledger.unknownDebits || []);
}
setLegacyEtsyExpenses(legacy || 0);

View file

@ -11,6 +11,7 @@ import {
generatePkce,
syncReceipts,
syncLedgerEntries,
dedupeLedgerExpenses,
} from '../services/etsyApi';
const router = Router();
@ -218,7 +219,9 @@ router.post('/sync', authenticate, async (req: AuthRequest, res: Response) => {
const creds = toCredentials(config);
const result = await syncReceipts(creds, connection);
// Also pull the payment-account ledger into expenses (exact fees from Etsy)
// Clean up any duplicate ledger expenses from earlier syncs, then pull the
// payment-account ledger into expenses (exact fees from Etsy)
const deduped = await dedupeLedgerExpenses(req.userId);
const ledger = await syncLedgerEntries(creds, connection);
// Warn if pre-ledger CSV-imported Etsy fees still exist (would double-count)
@ -228,7 +231,7 @@ router.post('/sync', authenticate, async (req: AuthRequest, res: Response) => {
reference: { $not: /^etsy-ledger-/ },
});
res.json({ ...result, ledger, legacyEtsyExpenses });
res.json({ ...result, ledger, legacyEtsyExpenses, dedupedFees: deduped });
} catch (err: any) {
console.error('Etsy sync failed:', err);
res.status(500).json({ message: err.message || 'Etsy sync failed' });

View file

@ -430,8 +430,8 @@ export const syncLedgerEntries = async (
date: when,
taxDeductible: true,
vendor: 'Etsy',
// Stable per-entry reference makes re-syncs idempotent via the unique
// {reference, vendor, amount, date} index
// Stable per-entry reference (one per Etsy ledger entry) is what makes
// re-syncs idempotent — checked explicitly below before inserting
reference: `etsy-ledger-${entry.entry_id}`,
notes: entry.reference_id ? `Etsy ${entry.reference_type || 'ref'} ${entry.reference_id}` : undefined,
userId: connection.userId,
@ -446,22 +446,51 @@ export const syncLedgerEntries = async (
windowStart = windowEnd;
}
// Idempotent insert: the unique index rejects entries already imported
// Idempotent insert: explicitly skip ledger entries already imported, rather
// than relying on a unique index (which may not be enforced if it failed to
// build over pre-existing duplicate data). Also de-dupes within this batch.
if (toCreate.length > 0) {
try {
const inserted = await Expense.insertMany(toCreate, { ordered: false, rawResult: true } as any);
result.feesCreated = (inserted as any).insertedCount ?? toCreate.length;
} catch (bulkError: any) {
if (bulkError.writeErrors) {
for (const e of bulkError.writeErrors) {
if (e.code === 11000) result.feesDuplicate++;
}
result.feesCreated = bulkError.result?.insertedCount ?? (toCreate.length - bulkError.writeErrors.length);
} else {
throw bulkError;
}
const refs = toCreate.map(t => t.reference);
const existing = await Expense.find(
{ userId: connection.userId, reference: { $in: refs } },
'reference'
).lean();
const seen = new Set(existing.map((e: any) => e.reference));
const fresh = toCreate.filter(t => {
if (seen.has(t.reference)) return false;
seen.add(t.reference);
return true;
});
result.feesDuplicate = toCreate.length - fresh.length;
if (fresh.length > 0) {
const inserted = await Expense.insertMany(fresh, { ordered: false });
result.feesCreated = inserted.length;
}
}
return result;
};
// Remove duplicate ledger expenses: keep one row per Etsy ledger entry
// (same reference) and delete the rest. Distinct entries that merely share a
// date/amount are NOT touched, since each has its own reference.
export const dedupeLedgerExpenses = async (userId: any): Promise<number> => {
const ledgerExpenses = await Expense.find({
userId,
vendor: 'Etsy',
reference: { $regex: /^etsy-ledger-/ },
}).select('_id reference').sort({ dateCreated: 1 }).lean();
const seen = new Set<string>();
const toDelete: any[] = [];
for (const exp of ledgerExpenses as any[]) {
if (seen.has(exp.reference)) toDelete.push(exp._id);
else seen.add(exp.reference);
}
if (toDelete.length === 0) return 0;
const res = await Expense.deleteMany({ _id: { $in: toDelete }, userId });
return res.deletedCount || 0;
};