From 550bea258233ce0099082995ef7db84cc1c4bc3b Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Mon, 29 Sep 2025 13:59:36 +0530 Subject: [PATCH 01/13] fix:script for cleanup stale entries --- SPAM_FILTER_USAGE.md | 76 +++++++++++ appinfo/info.xml | 1 + .../FilterLegitimateDomainsFromSpamReport.php | 123 ++++++++++++++++++ lib/Service/RecoveryEmailService.php | 75 +++++++++++ 4 files changed, 275 insertions(+) create mode 100644 SPAM_FILTER_USAGE.md create mode 100644 lib/Command/FilterLegitimateDomainsFromSpamReport.php diff --git a/SPAM_FILTER_USAGE.md b/SPAM_FILTER_USAGE.md new file mode 100644 index 0000000..db04db8 --- /dev/null +++ b/SPAM_FILTER_USAGE.md @@ -0,0 +1,76 @@ +# Spam Account Filter Command + +This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports. + +## Overview + +The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries. + +## Usage + +```bash +occ email-recovery:filter-legitimate-domains [options] +``` + +### Arguments + +- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`) +- `output-file`: Path where the filtered output file will be created + +### Options + +- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`) + +## Examples + +### Basic Usage +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt +``` + +### Include Recovery Email Addresses +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email +``` + +## How It Works + +1. **Reads the input file**: Parses each line as a user ID from the spam report +2. **Gets recovery email**: Retrieves the recovery email address for each user +3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService` +4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains +5. **Creates output file**: Writes the filtered results to the specified output file + +The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy. + +## Output Format + +### Without --include-recovery-email +``` +user1 +user2 +user3 +``` + +### With --include-recovery-email +``` +user1,spam@example.com +user2,fake@disposable.com +user3,temp@throwaway.net +``` + +## Summary Information + +The command provides a summary showing: +- Total entries processed +- Legitimate domains (filtered out) +- Spam entries (kept) +- Invalid entries (skipped) + +## Notes + +- The command automatically skips users who don't exist or have no recovery email +- Uses the same validation logic as the main spam detection system for consistency +- Subscriptions are checked - users with active subscriptions are treated as legitimate +- Errors are logged but don't stop the processing of other entries +- The command is safe to run multiple times on the same input file diff --git a/appinfo/info.xml b/appinfo/info.xml index e24e90e..ca659ca 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -23,5 +23,6 @@ OCA\EmailRecovery\Command\SpamAccountDetection OCA\EmailRecovery\Command\ResetDisposableDomainsList OCA\EmailRecovery\Command\RecoveryWarningNotificationCommand + OCA\EmailRecovery\Command\FilterLegitimateDomainsFromSpamReport diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php new file mode 100644 index 0000000..ea6b44b --- /dev/null +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -0,0 +1,123 @@ +recoveryEmailService = $recoveryEmailService; + $this->logger = $logger; + } + + protected function configure() { + $this + ->setName(Application::APP_ID . ':filter-legitimate-domains') + ->setDescription('Filter out legitimate domains from spam account report') + ->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file') + ->addArgument('output-file', InputArgument::REQUIRED, 'Path to the filtered output file') + ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); + } + + protected function execute(InputInterface $input, OutputInterface $output): int { + $inputFile = $input->getArgument('input-file'); + $outputFile = $input->getArgument('output-file'); + $includeRecoveryEmail = $input->getOption('include-recovery-email'); + + try { + // Check if input file exists + if (!file_exists($inputFile)) { + $output->writeln('Input file does not exist: ' . $inputFile . ''); + return Command::FAILURE; + } + + $output->writeln('Reading spam account report from: ' . $inputFile . ''); + + // Read the input file + $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if ($lines === false) { + $output->writeln('Failed to read input file'); + return Command::FAILURE; + } + + // Filter out header lines and empty lines + $spamUserIds = array_filter($lines, function($line) { + $line = trim($line); + return !empty($line) && $line !== 'Spam user list:'; + }); + + $totalEntries = count($spamUserIds); + $legitimateCount = 0; + $spamCount = 0; + $filteredEntries = []; + + $output->writeln('Processing ' . $totalEntries . ' entries...'); + + // Use the service method to filter legitimate domains + $this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers( + $spamUserIds, + // Callback for spam entries (keep them) + function (string $userId, string $recoveryEmail) use (&$spamCount, &$filteredEntries, $includeRecoveryEmail, $output) { + $spamCount++; + if ($includeRecoveryEmail) { + $filteredEntries[] = "$userId,$recoveryEmail"; + } else { + $filteredEntries[] = $userId; + } + $output->writeln("Keeping spam entry: $userId -> $recoveryEmail"); + }, + // Callback for legitimate entries (filter them out) + function (string $userId, string $recoveryEmail) use (&$legitimateCount, $output) { + $legitimateCount++; + $output->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); + } + ); + + // Display summary + $output->writeln(''); + $output->writeln('=== Filtering Summary ==='); + $output->writeln('Total entries processed: ' . $totalEntries); + $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); + $output->writeln('Spam entries (kept): ' . $spamCount); + $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + + // Write filtered results to output file + if (!empty($filteredEntries)) { + $output->writeln('Writing filtered results to: ' . $outputFile . ''); + + $outputContent = implode("\n", $filteredEntries) . "\n"; + if (file_put_contents($outputFile, $outputContent) === false) { + $output->writeln('Failed to write output file'); + return Command::FAILURE; + } + + $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); + } else { + $output->writeln('No spam entries to write (all were legitimate domains)'); + } + + return Command::SUCCESS; + + } catch (\Throwable $e) { + $this->logger->error('Error while filtering spam report: ' . $e->getMessage()); + $output->writeln('Error: ' . $e->getMessage() . ''); + return Command::FAILURE; + } + } +} diff --git a/lib/Service/RecoveryEmailService.php b/lib/Service/RecoveryEmailService.php index 69a7e37..33437df 100644 --- a/lib/Service/RecoveryEmailService.php +++ b/lib/Service/RecoveryEmailService.php @@ -648,6 +648,81 @@ class RecoveryEmailService { } } } + + /** + * Filters legitimate domains from a list of user IDs that were flagged as spam. + * + * This method takes a list of user IDs from a spam report and filters out those + * that belong to legitimate (popular) domains, returning only true spam entries. + * + * @param array $spamUserIds Array of user IDs from spam report + * @param callable $onSpamDetected Callback function with signature fn(string $userId, string $recoveryEmail): void + * @param callable $onLegitimateDetected Callback function with signature fn(string $userId, string $recoveryEmail): void + * @return void + */ + public function filterLegitimateDomainsFromSpamUsers(array $spamUserIds, callable $onSpamDetected, callable $onLegitimateDetected = null): void { + foreach ($spamUserIds as $userId) { + $userId = strtolower(trim($userId)); + + if ($userId === '') { + continue; + } + + $user = $this->userManager->get($userId); + if ($user === null) { + $this->logger->info("User not found: $userId"); + continue; + } + + $email = $user->getEMailAddress(); + if (empty($email)) { + $this->logger->info("No email address found for user: $userId"); + continue; + } + + // Get recovery email for this user + $recoveryEmail = $this->getRecoveryEmail($userId); + if (empty($recoveryEmail)) { + $this->logger->info("No recovery email found for user: $userId"); + continue; + } + + try { + // Check if user has active subscription (skip if they do) + if ($this->hasActiveSubscription($email)) { + $this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>."); + if ($onLegitimateDetected) { + $onLegitimateDetected($userId, $recoveryEmail); + } + continue; + } + } catch (\Throwable $e) { + $this->logger->error("Error checking subscription for $userId <$email>: " . $e->getMessage()); + continue; + } + + try { + // Use the same validation logic as getAllSpamEmails + if (!$this->validateRecoveryEmail($recoveryEmail, $userId)) { + // This is actually legitimate (validation passed), so it's not spam + if ($onLegitimateDetected) { + $onLegitimateDetected($userId, $recoveryEmail); + } + } + } catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) { + // This is indeed spam - validation failed + $this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage()); + $onSpamDetected($userId, $recoveryEmail); + } catch (\Throwable $e) { + // For other errors, we'll treat as legitimate to be safe + $this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage()); + if ($onLegitimateDetected) { + $onLegitimateDetected($userId, $recoveryEmail); + } + } + } + } + /** Recovery email reminder start date **/ public function getRecoveryEmailReminderStartDate(string $uid): ?string { return $this->config->getUserValue($uid, $this->appName, self::RECOVERY_EMAIL_REMINDER_START_DATE, null); -- GitLab From 6d6bdf03e31dae1f7ba61540c7d65da86881311d Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Mon, 29 Sep 2025 14:12:22 +0530 Subject: [PATCH 02/13] fix lint php --- lib/Command/FilterLegitimateDomainsFromSpamReport.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index ea6b44b..4da758f 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -57,7 +57,7 @@ class FilterLegitimateDomainsFromSpamReport extends Command { } // Filter out header lines and empty lines - $spamUserIds = array_filter($lines, function($line) { + $spamUserIds = array_filter($lines, function ($line) { $line = trim($line); return !empty($line) && $line !== 'Spam user list:'; }); @@ -113,7 +113,6 @@ class FilterLegitimateDomainsFromSpamReport extends Command { } return Command::SUCCESS; - } catch (\Throwable $e) { $this->logger->error('Error while filtering spam report: ' . $e->getMessage()); $output->writeln('Error: ' . $e->getMessage() . ''); -- GitLab From 639ff93b62e61c293daca7b64fe87d7fbefe530d Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Mon, 29 Sep 2025 15:57:23 +0530 Subject: [PATCH 03/13] remove spam filter file --- SPAM_FILTER_USAGE.md | 76 -------------------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 SPAM_FILTER_USAGE.md diff --git a/SPAM_FILTER_USAGE.md b/SPAM_FILTER_USAGE.md deleted file mode 100644 index db04db8..0000000 --- a/SPAM_FILTER_USAGE.md +++ /dev/null @@ -1,76 +0,0 @@ -# Spam Account Filter Command - -This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports. - -## Overview - -The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries. - -## Usage - -```bash -occ email-recovery:filter-legitimate-domains [options] -``` - -### Arguments - -- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`) -- `output-file`: Path where the filtered output file will be created - -### Options - -- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`) - -## Examples - -### Basic Usage -```bash -occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt -``` - -### Include Recovery Email Addresses -```bash -occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email -``` - -## How It Works - -1. **Reads the input file**: Parses each line as a user ID from the spam report -2. **Gets recovery email**: Retrieves the recovery email address for each user -3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService` -4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains -5. **Creates output file**: Writes the filtered results to the specified output file - -The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy. - -## Output Format - -### Without --include-recovery-email -``` -user1 -user2 -user3 -``` - -### With --include-recovery-email -``` -user1,spam@example.com -user2,fake@disposable.com -user3,temp@throwaway.net -``` - -## Summary Information - -The command provides a summary showing: -- Total entries processed -- Legitimate domains (filtered out) -- Spam entries (kept) -- Invalid entries (skipped) - -## Notes - -- The command automatically skips users who don't exist or have no recovery email -- Uses the same validation logic as the main spam detection system for consistency -- Subscriptions are checked - users with active subscriptions are treated as legitimate -- Errors are logged but don't stop the processing of other entries -- The command is safe to run multiple times on the same input file -- GitLab From d4bfa149a95f517837c9bcfcf7f27349da09f12d Mon Sep 17 00:00:00 2001 From: AVINASH GUSAIN Date: Tue, 30 Sep 2025 13:23:22 +0530 Subject: [PATCH 04/13] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Ronak Patel --- lib/Command/FilterLegitimateDomainsFromSpamReport.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 4da758f..3b8c6be 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -30,7 +30,7 @@ class FilterLegitimateDomainsFromSpamReport extends Command { $this ->setName(Application::APP_ID . ':filter-legitimate-domains') ->setDescription('Filter out legitimate domains from spam account report') - ->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file') + ->addArgument('input-file-path', InputArgument::REQUIRED, 'Path to the spam account report file') ->addArgument('output-file', InputArgument::REQUIRED, 'Path to the filtered output file') ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); } -- GitLab From 3e332f9060e74dc6e3e674eba9bff518868d7a18 Mon Sep 17 00:00:00 2001 From: AVINASH GUSAIN Date: Tue, 30 Sep 2025 13:23:34 +0530 Subject: [PATCH 05/13] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Ronak Patel --- lib/Command/FilterLegitimateDomainsFromSpamReport.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 3b8c6be..5ede3af 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -31,7 +31,7 @@ class FilterLegitimateDomainsFromSpamReport extends Command { ->setName(Application::APP_ID . ':filter-legitimate-domains') ->setDescription('Filter out legitimate domains from spam account report') ->addArgument('input-file-path', InputArgument::REQUIRED, 'Path to the spam account report file') - ->addArgument('output-file', InputArgument::REQUIRED, 'Path to the filtered output file') + ->addArgument('output-file-path', InputArgument::REQUIRED, 'Path to the filtered output file') ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); } -- GitLab From 2aff5a53cf297b8a8fda0cd7c2713c4949650235 Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 30 Sep 2025 20:57:31 +0530 Subject: [PATCH 06/13] fix:code refactoring for execute method --- SPAM_FILTER_USAGE.md | 76 +++++++ .../FilterLegitimateDomainsFromSpamReport.php | 205 ++++++++++++------ 2 files changed, 213 insertions(+), 68 deletions(-) create mode 100644 SPAM_FILTER_USAGE.md diff --git a/SPAM_FILTER_USAGE.md b/SPAM_FILTER_USAGE.md new file mode 100644 index 0000000..db04db8 --- /dev/null +++ b/SPAM_FILTER_USAGE.md @@ -0,0 +1,76 @@ +# Spam Account Filter Command + +This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports. + +## Overview + +The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries. + +## Usage + +```bash +occ email-recovery:filter-legitimate-domains [options] +``` + +### Arguments + +- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`) +- `output-file`: Path where the filtered output file will be created + +### Options + +- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`) + +## Examples + +### Basic Usage +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt +``` + +### Include Recovery Email Addresses +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email +``` + +## How It Works + +1. **Reads the input file**: Parses each line as a user ID from the spam report +2. **Gets recovery email**: Retrieves the recovery email address for each user +3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService` +4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains +5. **Creates output file**: Writes the filtered results to the specified output file + +The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy. + +## Output Format + +### Without --include-recovery-email +``` +user1 +user2 +user3 +``` + +### With --include-recovery-email +``` +user1,spam@example.com +user2,fake@disposable.com +user3,temp@throwaway.net +``` + +## Summary Information + +The command provides a summary showing: +- Total entries processed +- Legitimate domains (filtered out) +- Spam entries (kept) +- Invalid entries (skipped) + +## Notes + +- The command automatically skips users who don't exist or have no recovery email +- Uses the same validation logic as the main spam detection system for consistency +- Subscriptions are checked - users with active subscriptions are treated as legitimate +- Errors are logged but don't stop the processing of other entries +- The command is safe to run multiple times on the same input file diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 5ede3af..bce0170 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -16,6 +16,7 @@ use Symfony\Component\Console\Output\OutputInterface; class FilterLegitimateDomainsFromSpamReport extends Command { private RecoveryEmailService $recoveryEmailService; private ILogger $logger; + private array $processingContext = []; public function __construct( RecoveryEmailService $recoveryEmailService, @@ -30,93 +31,161 @@ class FilterLegitimateDomainsFromSpamReport extends Command { $this ->setName(Application::APP_ID . ':filter-legitimate-domains') ->setDescription('Filter out legitimate domains from spam account report') - ->addArgument('input-file-path', InputArgument::REQUIRED, 'Path to the spam account report file') - ->addArgument('output-file-path', InputArgument::REQUIRED, 'Path to the filtered output file') + ->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file') + ->addArgument('output-file', InputArgument::REQUIRED, 'Path to the filtered output file') ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); } protected function execute(InputInterface $input, OutputInterface $output): int { - $inputFile = $input->getArgument('input-file'); - $outputFile = $input->getArgument('output-file'); - $includeRecoveryEmail = $input->getOption('include-recovery-email'); - try { - // Check if input file exists - if (!file_exists($inputFile)) { - $output->writeln('Input file does not exist: ' . $inputFile . ''); - return Command::FAILURE; - } - - $output->writeln('Reading spam account report from: ' . $inputFile . ''); + $inputFile = $input->getArgument('input-file'); + $outputFile = $input->getArgument('output-file'); + $includeRecoveryEmail = $input->getOption('include-recovery-email'); - // Read the input file - $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if ($lines === false) { - $output->writeln('Failed to read input file'); + // Validate and read input file + $spamUserIds = $this->readSpamReportFile($inputFile, $output); + if ($spamUserIds === null) { return Command::FAILURE; } - // Filter out header lines and empty lines - $spamUserIds = array_filter($lines, function ($line) { - $line = trim($line); - return !empty($line) && $line !== 'Spam user list:'; - }); - - $totalEntries = count($spamUserIds); - $legitimateCount = 0; - $spamCount = 0; - $filteredEntries = []; - - $output->writeln('Processing ' . $totalEntries . ' entries...'); - - // Use the service method to filter legitimate domains - $this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers( - $spamUserIds, - // Callback for spam entries (keep them) - function (string $userId, string $recoveryEmail) use (&$spamCount, &$filteredEntries, $includeRecoveryEmail, $output) { - $spamCount++; - if ($includeRecoveryEmail) { - $filteredEntries[] = "$userId,$recoveryEmail"; - } else { - $filteredEntries[] = $userId; - } - $output->writeln("Keeping spam entry: $userId -> $recoveryEmail"); - }, - // Callback for legitimate entries (filter them out) - function (string $userId, string $recoveryEmail) use (&$legitimateCount, $output) { - $legitimateCount++; - $output->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); - } - ); + // Process the spam users and filter legitimate domains + $results = $this->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output); // Display summary - $output->writeln(''); - $output->writeln('=== Filtering Summary ==='); - $output->writeln('Total entries processed: ' . $totalEntries); - $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); - $output->writeln('Spam entries (kept): ' . $spamCount); - $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + $this->displaySummary($results, $output); // Write filtered results to output file - if (!empty($filteredEntries)) { - $output->writeln('Writing filtered results to: ' . $outputFile . ''); - - $outputContent = implode("\n", $filteredEntries) . "\n"; - if (file_put_contents($outputFile, $outputContent) === false) { - $output->writeln('Failed to write output file'); - return Command::FAILURE; - } - - $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); - } else { - $output->writeln('No spam entries to write (all were legitimate domains)'); - } + $this->writeOutputFile($results['filteredEntries'], $outputFile, $output); return Command::SUCCESS; + } catch (\Throwable $e) { $this->logger->error('Error while filtering spam report: ' . $e->getMessage()); $output->writeln('Error: ' . $e->getMessage() . ''); return Command::FAILURE; } } + + /** + * Validates and reads the spam report input file + */ + private function readSpamReportFile(string $inputFile, OutputInterface $output): ?array { + // Check if input file exists + if (!file_exists($inputFile)) { + $output->writeln('Input file does not exist: ' . $inputFile . ''); + return null; + } + + $output->writeln('Reading spam account report from: ' . $inputFile . ''); + + // Read the input file + $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if ($lines === false) { + $output->writeln('Failed to read input file'); + return null; + } + + // Filter out header lines and empty lines + $spamUserIds = array_filter($lines, function ($line) { + $line = trim($line); + return !empty($line) && $line !== 'Spam user list:'; + }); + + return array_values($spamUserIds); // Re-index array + } + + /** + * Processes spam users and filters out legitimate domains + */ + private function processSpamUsers(array $spamUserIds, bool $includeRecoveryEmail, OutputInterface $output): array { + $totalEntries = count($spamUserIds); + $legitimateCount = 0; + $spamCount = 0; + $filteredEntries = []; + + $output->writeln('Processing ' . $totalEntries . ' entries...'); + + // Set the context for the callback methods + $this->processingContext = [ + 'spamCount' => &$spamCount, + 'legitimateCount' => &$legitimateCount, + 'filteredEntries' => &$filteredEntries, + 'includeRecoveryEmail' => $includeRecoveryEmail, + 'output' => $output + ]; + + // Use the service method to filter legitimate domains + $this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers( + $spamUserIds, + // Callback for spam entries (keep them) + [$this, 'onSpamDetected'], + // Callback for legitimate entries (filter them out) + [$this, 'onLegitimateDetected'] + ); + + return [ + 'totalEntries' => $totalEntries, + 'legitimateCount' => $legitimateCount, + 'spamCount' => $spamCount, + 'filteredEntries' => $filteredEntries + ]; + } + + /** + * Callback method for spam entries (keep them) + */ + public function onSpamDetected(string $userId, string $recoveryEmail): void { + $this->processingContext['spamCount']++; + + if ($this->processingContext['includeRecoveryEmail']) { + $this->processingContext['filteredEntries'][] = "$userId,$recoveryEmail"; + } else { + $this->processingContext['filteredEntries'][] = $userId; + } + + $this->processingContext['output']->writeln("Keeping spam entry: $userId -> $recoveryEmail"); + } + + /** + * Callback method for legitimate entries (filter them out) + */ + public function onLegitimateDetected(string $userId, string $recoveryEmail): void { + $this->processingContext['legitimateCount']++; + $this->processingContext['output']->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); + } + + /** + * Displays the filtering summary + */ + private function displaySummary(array $results, OutputInterface $output): void { + $totalEntries = $results['totalEntries']; + $legitimateCount = $results['legitimateCount']; + $spamCount = $results['spamCount']; + + $output->writeln(''); + $output->writeln('=== Filtering Summary ==='); + $output->writeln('Total entries processed: ' . $totalEntries); + $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); + $output->writeln('Spam entries (kept): ' . $spamCount); + $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + } + + /** + * Writes the filtered results to the output file + */ + private function writeOutputFile(array $filteredEntries, string $outputFile, OutputInterface $output): void { + if (!empty($filteredEntries)) { + $output->writeln('Writing filtered results to: ' . $outputFile . ''); + + $outputContent = implode("\n", $filteredEntries) . "\n"; + if (file_put_contents($outputFile, $outputContent) === false) { + $output->writeln('Failed to write output file'); + throw new \RuntimeException('Failed to write output file'); + } + + $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); + } else { + $output->writeln('No spam entries to write (all were legitimate domains)'); + } + } } -- GitLab From 6e5ad266df8a9d39f7c7ec3393c6dfc911b88311 Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 30 Sep 2025 21:00:10 +0530 Subject: [PATCH 07/13] readme updated --- README.md | 73 +++++++++++++++++++++++++++++++++++++++++- SPAM_FILTER_USAGE.md | 76 -------------------------------------------- 2 files changed, 72 insertions(+), 77 deletions(-) delete mode 100644 SPAM_FILTER_USAGE.md diff --git a/README.md b/README.md index cb8ba04..8a1982c 100644 --- a/README.md +++ b/README.md @@ -143,4 +143,75 @@ occ email-recovery:recovery-warning-notification 2>> /var/log/nextcloud/recovery # Log with timestamps occ email-recovery:recovery-warning-notification 2>&1 | while IFS= read -r line; do echo "$(date '+%Y-%m-%d %H:%M:%S') $line"; done >> /var/log/nextcloud/recovery-timestamped.log -``` \ No newline at end of file +``` + + +# Spam Account Filter Command + +This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports. + +## Overview + +The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries. + +## Usage + +```bash +occ email-recovery:filter-legitimate-domains [options] +``` + +### Arguments + +- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`) +- `output-file`: Path where the filtered output file will be created + +### Options + +- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`) + +## Examples + +### Basic Usage +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt +``` + +### Include Recovery Email Addresses +```bash +occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email +``` + +## How It Works + +1. **Reads the input file**: Parses each line as a user ID from the spam report +2. **Gets recovery email**: Retrieves the recovery email address for each user +3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService` +4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains +5. **Creates output file**: Writes the filtered results to the specified output file + +The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy. + +## Output Format + +### Without --include-recovery-email +``` +user1 +user2 +user3 +``` + +### With --include-recovery-email +``` +user1,spam@example.com +user2,fake@disposable.com +user3,temp@throwaway.net +``` + +## Summary Information + +The command provides a summary showing: +- Total entries processed +- Legitimate domains (filtered out) +- Spam entries (kept) +- Invalid entries (skipped) + diff --git a/SPAM_FILTER_USAGE.md b/SPAM_FILTER_USAGE.md deleted file mode 100644 index db04db8..0000000 --- a/SPAM_FILTER_USAGE.md +++ /dev/null @@ -1,76 +0,0 @@ -# Spam Account Filter Command - -This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports. - -## Overview - -The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries. - -## Usage - -```bash -occ email-recovery:filter-legitimate-domains [options] -``` - -### Arguments - -- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`) -- `output-file`: Path where the filtered output file will be created - -### Options - -- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`) - -## Examples - -### Basic Usage -```bash -occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt -``` - -### Include Recovery Email Addresses -```bash -occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email -``` - -## How It Works - -1. **Reads the input file**: Parses each line as a user ID from the spam report -2. **Gets recovery email**: Retrieves the recovery email address for each user -3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService` -4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains -5. **Creates output file**: Writes the filtered results to the specified output file - -The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy. - -## Output Format - -### Without --include-recovery-email -``` -user1 -user2 -user3 -``` - -### With --include-recovery-email -``` -user1,spam@example.com -user2,fake@disposable.com -user3,temp@throwaway.net -``` - -## Summary Information - -The command provides a summary showing: -- Total entries processed -- Legitimate domains (filtered out) -- Spam entries (kept) -- Invalid entries (skipped) - -## Notes - -- The command automatically skips users who don't exist or have no recovery email -- Uses the same validation logic as the main spam detection system for consistency -- Subscriptions are checked - users with active subscriptions are treated as legitimate -- Errors are logged but don't stop the processing of other entries -- The command is safe to run multiple times on the same input file -- GitLab From 07057894083bf9859b70f30ae1f5a624e788f2bd Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Wed, 1 Oct 2025 10:45:01 +0530 Subject: [PATCH 08/13] lint fix --- lib/Command/FilterLegitimateDomainsFromSpamReport.php | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index bce0170..5eb0346 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -58,7 +58,6 @@ class FilterLegitimateDomainsFromSpamReport extends Command { $this->writeOutputFile($results['filteredEntries'], $outputFile, $output); return Command::SUCCESS; - } catch (\Throwable $e) { $this->logger->error('Error while filtering spam report: ' . $e->getMessage()); $output->writeln('Error: ' . $e->getMessage() . ''); -- GitLab From df326781870c1242c2d5c1fee64ad4fb8918a848 Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Wed, 1 Oct 2025 13:24:49 +0530 Subject: [PATCH 09/13] code refactoring --- .../FilterLegitimateDomainsFromSpamReport.php | 140 +---------------- lib/Service/SpamFilterService.php | 145 ++++++++++++++++++ 2 files changed, 153 insertions(+), 132 deletions(-) create mode 100644 lib/Service/SpamFilterService.php diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 5eb0346..1970686 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -5,7 +5,7 @@ declare(strict_types=1); namespace OCA\EmailRecovery\Command; use OCA\EmailRecovery\AppInfo\Application; -use OCA\EmailRecovery\Service\RecoveryEmailService; +use OCA\EmailRecovery\Service\SpamFilterService; use OCP\ILogger; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputArgument; @@ -14,16 +14,15 @@ use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; class FilterLegitimateDomainsFromSpamReport extends Command { - private RecoveryEmailService $recoveryEmailService; + private SpamFilterService $spamFilterService; private ILogger $logger; - private array $processingContext = []; public function __construct( - RecoveryEmailService $recoveryEmailService, + SpamFilterService $spamFilterService, ILogger $logger ) { parent::__construct(); - $this->recoveryEmailService = $recoveryEmailService; + $this->spamFilterService = $spamFilterService; $this->logger = $logger; } @@ -43,19 +42,19 @@ class FilterLegitimateDomainsFromSpamReport extends Command { $includeRecoveryEmail = $input->getOption('include-recovery-email'); // Validate and read input file - $spamUserIds = $this->readSpamReportFile($inputFile, $output); + $spamUserIds = $this->spamFilterService->readSpamReportFile($inputFile, $output); if ($spamUserIds === null) { return Command::FAILURE; } // Process the spam users and filter legitimate domains - $results = $this->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output); + $results = $this->spamFilterService->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output); // Display summary - $this->displaySummary($results, $output); + $this->spamFilterService->displaySummary($results, $output); // Write filtered results to output file - $this->writeOutputFile($results['filteredEntries'], $outputFile, $output); + $this->spamFilterService->writeOutputFile($results['filteredEntries'], $outputFile, $output); return Command::SUCCESS; } catch (\Throwable $e) { @@ -64,127 +63,4 @@ class FilterLegitimateDomainsFromSpamReport extends Command { return Command::FAILURE; } } - - /** - * Validates and reads the spam report input file - */ - private function readSpamReportFile(string $inputFile, OutputInterface $output): ?array { - // Check if input file exists - if (!file_exists($inputFile)) { - $output->writeln('Input file does not exist: ' . $inputFile . ''); - return null; - } - - $output->writeln('Reading spam account report from: ' . $inputFile . ''); - - // Read the input file - $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if ($lines === false) { - $output->writeln('Failed to read input file'); - return null; - } - - // Filter out header lines and empty lines - $spamUserIds = array_filter($lines, function ($line) { - $line = trim($line); - return !empty($line) && $line !== 'Spam user list:'; - }); - - return array_values($spamUserIds); // Re-index array - } - - /** - * Processes spam users and filters out legitimate domains - */ - private function processSpamUsers(array $spamUserIds, bool $includeRecoveryEmail, OutputInterface $output): array { - $totalEntries = count($spamUserIds); - $legitimateCount = 0; - $spamCount = 0; - $filteredEntries = []; - - $output->writeln('Processing ' . $totalEntries . ' entries...'); - - // Set the context for the callback methods - $this->processingContext = [ - 'spamCount' => &$spamCount, - 'legitimateCount' => &$legitimateCount, - 'filteredEntries' => &$filteredEntries, - 'includeRecoveryEmail' => $includeRecoveryEmail, - 'output' => $output - ]; - - // Use the service method to filter legitimate domains - $this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers( - $spamUserIds, - // Callback for spam entries (keep them) - [$this, 'onSpamDetected'], - // Callback for legitimate entries (filter them out) - [$this, 'onLegitimateDetected'] - ); - - return [ - 'totalEntries' => $totalEntries, - 'legitimateCount' => $legitimateCount, - 'spamCount' => $spamCount, - 'filteredEntries' => $filteredEntries - ]; - } - - /** - * Callback method for spam entries (keep them) - */ - public function onSpamDetected(string $userId, string $recoveryEmail): void { - $this->processingContext['spamCount']++; - - if ($this->processingContext['includeRecoveryEmail']) { - $this->processingContext['filteredEntries'][] = "$userId,$recoveryEmail"; - } else { - $this->processingContext['filteredEntries'][] = $userId; - } - - $this->processingContext['output']->writeln("Keeping spam entry: $userId -> $recoveryEmail"); - } - - /** - * Callback method for legitimate entries (filter them out) - */ - public function onLegitimateDetected(string $userId, string $recoveryEmail): void { - $this->processingContext['legitimateCount']++; - $this->processingContext['output']->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); - } - - /** - * Displays the filtering summary - */ - private function displaySummary(array $results, OutputInterface $output): void { - $totalEntries = $results['totalEntries']; - $legitimateCount = $results['legitimateCount']; - $spamCount = $results['spamCount']; - - $output->writeln(''); - $output->writeln('=== Filtering Summary ==='); - $output->writeln('Total entries processed: ' . $totalEntries); - $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); - $output->writeln('Spam entries (kept): ' . $spamCount); - $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); - } - - /** - * Writes the filtered results to the output file - */ - private function writeOutputFile(array $filteredEntries, string $outputFile, OutputInterface $output): void { - if (!empty($filteredEntries)) { - $output->writeln('Writing filtered results to: ' . $outputFile . ''); - - $outputContent = implode("\n", $filteredEntries) . "\n"; - if (file_put_contents($outputFile, $outputContent) === false) { - $output->writeln('Failed to write output file'); - throw new \RuntimeException('Failed to write output file'); - } - - $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); - } else { - $output->writeln('No spam entries to write (all were legitimate domains)'); - } - } } diff --git a/lib/Service/SpamFilterService.php b/lib/Service/SpamFilterService.php new file mode 100644 index 0000000..d7314da --- /dev/null +++ b/lib/Service/SpamFilterService.php @@ -0,0 +1,145 @@ +recoveryEmailService = $recoveryEmailService; + $this->logger = $logger; + } + + /** + * Validates and reads the spam report input file + */ + public function readSpamReportFile(string $inputFile, OutputInterface $output): ?array { + // Check if input file exists + if (!file_exists($inputFile)) { + $output->writeln('Input file does not exist: ' . $inputFile . ''); + return null; + } + + $output->writeln('Reading spam account report from: ' . $inputFile . ''); + + // Read the input file + $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if ($lines === false) { + $output->writeln('Failed to read input file'); + return null; + } + + // Filter out header lines and empty lines + $spamUserIds = array_filter($lines, function ($line) { + $line = trim($line); + return !empty($line) && $line !== 'Spam user list:'; + }); + + return array_values($spamUserIds); // Re-index array + } + + /** + * Processes spam users and filters out legitimate domains + */ + public function processSpamUsers(array $spamUserIds, bool $includeRecoveryEmail, OutputInterface $output): array { + $totalEntries = count($spamUserIds); + $legitimateCount = 0; + $spamCount = 0; + $filteredEntries = []; + + $output->writeln('Processing ' . $totalEntries . ' entries...'); + + // Set the context for the callback methods + $this->processingContext = [ + 'spamCount' => &$spamCount, + 'legitimateCount' => &$legitimateCount, + 'filteredEntries' => &$filteredEntries, + 'includeRecoveryEmail' => $includeRecoveryEmail, + 'output' => $output + ]; + + // Use the service method to filter legitimate domains + $this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers( + $spamUserIds, + // Callback for spam entries (keep them) + [$this, 'onSpamDetected'], + // Callback for legitimate entries (filter them out) + [$this, 'onLegitimateDetected'] + ); + + return [ + 'totalEntries' => $totalEntries, + 'legitimateCount' => $legitimateCount, + 'spamCount' => $spamCount, + 'filteredEntries' => $filteredEntries + ]; + } + + /** + * Displays the filtering summary + */ + public function displaySummary(array $results, OutputInterface $output): void { + $totalEntries = $results['totalEntries']; + $legitimateCount = $results['legitimateCount']; + $spamCount = $results['spamCount']; + + $output->writeln(''); + $output->writeln('=== Filtering Summary ==='); + $output->writeln('Total entries processed: ' . $totalEntries); + $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); + $output->writeln('Spam entries (kept): ' . $spamCount); + $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + } + + /** + * Writes the filtered results to the output file + */ + public function writeOutputFile(array $filteredEntries, string $outputFile, OutputInterface $output): void { + if (!empty($filteredEntries)) { + $output->writeln('Writing filtered results to: ' . $outputFile . ''); + + $outputContent = implode("\n", $filteredEntries) . "\n"; + if (file_put_contents($outputFile, $outputContent) === false) { + $output->writeln('Failed to write output file'); + throw new \RuntimeException('Failed to write output file'); + } + + $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); + } else { + $output->writeln('No spam entries to write (all were legitimate domains)'); + } + } + + /** + * Callback method for spam entries (keep them) + */ + public function onSpamDetected(string $userId, string $recoveryEmail): void { + $this->processingContext['spamCount']++; + + if ($this->processingContext['includeRecoveryEmail']) { + $this->processingContext['filteredEntries'][] = "$userId,$recoveryEmail"; + } else { + $this->processingContext['filteredEntries'][] = $userId; + } + + $this->processingContext['output']->writeln("Keeping spam entry: $userId -> $recoveryEmail"); + } + + /** + * Callback method for legitimate entries (filter them out) + */ + public function onLegitimateDetected(string $userId, string $recoveryEmail): void { + $this->processingContext['legitimateCount']++; + $this->processingContext['output']->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); + } +} -- GitLab From e88187c41cbb8bfc07d34b17dd43820c5a78b2fb Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 21 Oct 2025 12:30:17 +0530 Subject: [PATCH 10/13] refactor: improve spam filtering architecture and separation of concerns - Extract common validation logic into reusable validateUserRecoveryEmail method - Refactor getAllSpamEmails and filterLegitimateDomainsFromSpamUsers to use shared validation - Move displaySummary and writeOutputFile from service to command layer - Change output from file-based to console-based for better flexibility - Improve method visibility by making callback methods private - Add proper invalid entry counting and logging with specific reasons - Use guard clauses for cleaner code structure - Fix validation logic inversion in spam detection - Remove unused code and consolidate duplicate validation logic --- .../FilterLegitimateDomainsFromSpamReport.php | 46 +++++++- lib/Service/RecoveryEmailService.php | 107 ++++++++++-------- lib/Service/SpamFilterService.php | 43 +------ 3 files changed, 105 insertions(+), 91 deletions(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 1970686..604a9a7 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -29,16 +29,14 @@ class FilterLegitimateDomainsFromSpamReport extends Command { protected function configure() { $this ->setName(Application::APP_ID . ':filter-legitimate-domains') - ->setDescription('Filter out legitimate domains from spam account report') + ->setDescription('Filter out legitimate domains from spam account report and output to console') ->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file') - ->addArgument('output-file', InputArgument::REQUIRED, 'Path to the filtered output file') ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); } protected function execute(InputInterface $input, OutputInterface $output): int { try { $inputFile = $input->getArgument('input-file'); - $outputFile = $input->getArgument('output-file'); $includeRecoveryEmail = $input->getOption('include-recovery-email'); // Validate and read input file @@ -51,10 +49,10 @@ class FilterLegitimateDomainsFromSpamReport extends Command { $results = $this->spamFilterService->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output); // Display summary - $this->spamFilterService->displaySummary($results, $output); + $this->displaySummary($results, $output); - // Write filtered results to output file - $this->spamFilterService->writeOutputFile($results['filteredEntries'], $outputFile, $output); + // Write filtered results to console + $this->writeResultsToConsole($results['filteredEntries'], $output); return Command::SUCCESS; } catch (\Throwable $e) { @@ -63,4 +61,40 @@ class FilterLegitimateDomainsFromSpamReport extends Command { return Command::FAILURE; } } + + /** + * Displays the filtering summary + */ + private function displaySummary(array $results, OutputInterface $output): void { + $totalEntries = $results['totalEntries']; + $legitimateCount = $results['legitimateCount']; + $spamCount = $results['spamCount']; + + $output->writeln(''); + $output->writeln('=== Filtering Summary ==='); + $output->writeln('Total entries processed: ' . $totalEntries); + $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); + $output->writeln('Spam entries (kept): ' . $spamCount); + $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + } + + /** + * Writes the filtered results to console (stdout) + */ + private function writeResultsToConsole(array $filteredEntries, OutputInterface $output): void { + // no entries to write + if (empty($filteredEntries)) { + $output->writeln('No spam entries to output (all were legitimate domains)'); + return; + } + + $output->writeln('Outputting ' . count($filteredEntries) . ' filtered entries to console...'); + + // Write each entry to console (stdout) + foreach ($filteredEntries as $entry) { + echo $entry . "\n"; + } + + $output->writeln('Successfully output ' . count($filteredEntries) . ' entries to console'); + } } diff --git a/lib/Service/RecoveryEmailService.php b/lib/Service/RecoveryEmailService.php index 3541c6b..96b268b 100644 --- a/lib/Service/RecoveryEmailService.php +++ b/lib/Service/RecoveryEmailService.php @@ -682,6 +682,47 @@ class RecoveryEmailService { return false; } + /** + * Validates a user's recovery email and determines if it's spam. + * + * This is the core validation logic used by both getAllSpamEmails and + * filterLegitimateDomainsFromSpamUsers methods. + * + * @param string $userId The user ID to validate + * @param string $recoveryEmail The recovery email to validate + * @param string $userEmail The user's main email address + * @return array Returns an array with 'isSpam' boolean and 'reason' string + */ + private function validateUserRecoveryEmail(string $userId, string $recoveryEmail, string $userEmail): array { + // Check if user has active subscription (skip if they do) + try { + if ($this->hasActiveSubscription($userEmail)) { + $this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>."); + return ['isSpam' => false, 'reason' => 'active_subscription']; + } + } catch (\Throwable $e) { + $this->logger->error("Error checking subscription for $userId <$userEmail>: " . $e->getMessage()); + return ['isSpam' => false, 'reason' => 'subscription_check_error']; + } + + // Validate the recovery email + try { + if ($this->validateRecoveryEmail($recoveryEmail, $userId)) { + // Validation passed - this is legitimate (not spam) + return ['isSpam' => false, 'reason' => 'validation_passed']; + } else { + // Validation failed - this is spam + return ['isSpam' => true, 'reason' => 'validation_failed']; + } + } catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) { + $this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage()); + return ['isSpam' => true, 'reason' => 'validation_failed']; + } catch (\Throwable $e) { + $this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage()); + return ['isSpam' => false, 'reason' => 'validation_error']; + } + } + /** * Scans all verified recovery email addresses and returns a list of spam accounts. * @@ -721,25 +762,9 @@ class RecoveryEmailService { continue; } - try { - if ($this->hasActiveSubscription($email)) { - $this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>."); - continue; - } - } catch (\Throwable $e) { - $this->logger->error("Error checking subscription for $userId <$email>: " . $e->getMessage()); - continue; - } - - try { - if (!$this->validateRecoveryEmail($recoveryEmail, $userId)) { - $onSpamDetected($userId, $recoveryEmail); - } - } catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) { - $this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage()); + $validation = $this->validateUserRecoveryEmail($userId, $recoveryEmail, $email); + if ($validation['isSpam']) { $onSpamDetected($userId, $recoveryEmail); - } catch (\Throwable $e) { - $this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage()); } } } @@ -753,25 +778,35 @@ class RecoveryEmailService { * @param array $spamUserIds Array of user IDs from spam report * @param callable $onSpamDetected Callback function with signature fn(string $userId, string $recoveryEmail): void * @param callable $onLegitimateDetected Callback function with signature fn(string $userId, string $recoveryEmail): void + * @param callable $onInvalidDetected Optional callback function with signature fn(string $userId, string $reason): void * @return void */ - public function filterLegitimateDomainsFromSpamUsers(array $spamUserIds, callable $onSpamDetected, callable $onLegitimateDetected = null): void { + public function filterLegitimateDomainsFromSpamUsers(array $spamUserIds, callable $onSpamDetected, callable $onLegitimateDetected = null, callable $onInvalidDetected = null): void { foreach ($spamUserIds as $userId) { $userId = strtolower(trim($userId)); if ($userId === '') { + if ($onInvalidDetected) { + $onInvalidDetected($userId, 'empty_user_id'); + } continue; } $user = $this->userManager->get($userId); if ($user === null) { $this->logger->info("User not found: $userId"); + if ($onInvalidDetected) { + $onInvalidDetected($userId, 'user_not_found'); + } continue; } $email = $user->getEMailAddress(); if (empty($email)) { $this->logger->info("No email address found for user: $userId"); + if ($onInvalidDetected) { + $onInvalidDetected($userId, 'no_email_address'); + } continue; } @@ -779,38 +814,18 @@ class RecoveryEmailService { $recoveryEmail = $this->getRecoveryEmail($userId); if (empty($recoveryEmail)) { $this->logger->info("No recovery email found for user: $userId"); - continue; - } - - try { - // Check if user has active subscription (skip if they do) - if ($this->hasActiveSubscription($email)) { - $this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>."); - if ($onLegitimateDetected) { - $onLegitimateDetected($userId, $recoveryEmail); - } - continue; + if ($onInvalidDetected) { + $onInvalidDetected($userId, 'no_recovery_email'); } - } catch (\Throwable $e) { - $this->logger->error("Error checking subscription for $userId <$email>: " . $e->getMessage()); continue; } - try { - // Use the same validation logic as getAllSpamEmails - if (!$this->validateRecoveryEmail($recoveryEmail, $userId)) { - // This is actually legitimate (validation passed), so it's not spam - if ($onLegitimateDetected) { - $onLegitimateDetected($userId, $recoveryEmail); - } - } - } catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) { - // This is indeed spam - validation failed - $this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage()); + $validation = $this->validateUserRecoveryEmail($userId, $recoveryEmail, $email); + + if ($validation['isSpam']) { $onSpamDetected($userId, $recoveryEmail); - } catch (\Throwable $e) { - // For other errors, we'll treat as legitimate to be safe - $this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage()); + } else { + // This is legitimate (validation passed or other non-spam reason) if ($onLegitimateDetected) { $onLegitimateDetected($userId, $recoveryEmail); } diff --git a/lib/Service/SpamFilterService.php b/lib/Service/SpamFilterService.php index d7314da..87645d1 100644 --- a/lib/Service/SpamFilterService.php +++ b/lib/Service/SpamFilterService.php @@ -85,45 +85,10 @@ class SpamFilterService { ]; } - /** - * Displays the filtering summary - */ - public function displaySummary(array $results, OutputInterface $output): void { - $totalEntries = $results['totalEntries']; - $legitimateCount = $results['legitimateCount']; - $spamCount = $results['spamCount']; - - $output->writeln(''); - $output->writeln('=== Filtering Summary ==='); - $output->writeln('Total entries processed: ' . $totalEntries); - $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); - $output->writeln('Spam entries (kept): ' . $spamCount); - $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); - } - - /** - * Writes the filtered results to the output file - */ - public function writeOutputFile(array $filteredEntries, string $outputFile, OutputInterface $output): void { - if (!empty($filteredEntries)) { - $output->writeln('Writing filtered results to: ' . $outputFile . ''); - - $outputContent = implode("\n", $filteredEntries) . "\n"; - if (file_put_contents($outputFile, $outputContent) === false) { - $output->writeln('Failed to write output file'); - throw new \RuntimeException('Failed to write output file'); - } - - $output->writeln('Successfully created filtered report with ' . count($filteredEntries) . ' entries'); - } else { - $output->writeln('No spam entries to write (all were legitimate domains)'); - } - } - /** * Callback method for spam entries (keep them) */ - public function onSpamDetected(string $userId, string $recoveryEmail): void { + private function onSpamDetected(string $userId, string $recoveryEmail): void { $this->processingContext['spamCount']++; if ($this->processingContext['includeRecoveryEmail']) { @@ -132,14 +97,14 @@ class SpamFilterService { $this->processingContext['filteredEntries'][] = $userId; } - $this->processingContext['output']->writeln("Keeping spam entry: $userId -> $recoveryEmail"); + $this->processingContext['output']->writeln("KEEPING SPAM: $userId -> $recoveryEmail"); } /** * Callback method for legitimate entries (filter them out) */ - public function onLegitimateDetected(string $userId, string $recoveryEmail): void { + private function onLegitimateDetected(string $userId, string $recoveryEmail): void { $this->processingContext['legitimateCount']++; - $this->processingContext['output']->writeln("Skipping legitimate domain: $userId -> $recoveryEmail"); + $this->processingContext['output']->writeln("REMOVING LEGITIMATE: $userId -> $recoveryEmail"); } } -- GitLab From 1b71cd743e79190f625dae79d656c7d919d0bf34 Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 21 Oct 2025 12:34:42 +0530 Subject: [PATCH 11/13] fix:php lint --- lib/Service/RecoveryEmailService.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Service/RecoveryEmailService.php b/lib/Service/RecoveryEmailService.php index 96b268b..4266d56 100644 --- a/lib/Service/RecoveryEmailService.php +++ b/lib/Service/RecoveryEmailService.php @@ -684,8 +684,8 @@ class RecoveryEmailService { } /** * Validates a user's recovery email and determines if it's spam. - * - * This is the core validation logic used by both getAllSpamEmails and + * + * This is the core validation logic used by both getAllSpamEmails and * filterLegitimateDomainsFromSpamUsers methods. * * @param string $userId The user ID to validate -- GitLab From cfcf2c6075e7c4a79aee1fbd2f026e671499149a Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 21 Oct 2025 13:03:26 +0530 Subject: [PATCH 12/13] make function public --- lib/Service/SpamFilterService.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Service/SpamFilterService.php b/lib/Service/SpamFilterService.php index 87645d1..db2f861 100644 --- a/lib/Service/SpamFilterService.php +++ b/lib/Service/SpamFilterService.php @@ -88,7 +88,7 @@ class SpamFilterService { /** * Callback method for spam entries (keep them) */ - private function onSpamDetected(string $userId, string $recoveryEmail): void { + public function onSpamDetected(string $userId, string $recoveryEmail): void { $this->processingContext['spamCount']++; if ($this->processingContext['includeRecoveryEmail']) { @@ -103,7 +103,7 @@ class SpamFilterService { /** * Callback method for legitimate entries (filter them out) */ - private function onLegitimateDetected(string $userId, string $recoveryEmail): void { + public function onLegitimateDetected(string $userId, string $recoveryEmail): void { $this->processingContext['legitimateCount']++; $this->processingContext['output']->writeln("REMOVING LEGITIMATE: $userId -> $recoveryEmail"); } -- GitLab From 803644d3d85eaa18034f8e37f0e0a990f358f62c Mon Sep 17 00:00:00 2001 From: Avinash Gusain Date: Tue, 21 Oct 2025 14:25:17 +0530 Subject: [PATCH 13/13] put info in logger and keep only spam entry for console --- .../FilterLegitimateDomainsFromSpamReport.php | 46 ++++++++++--------- lib/Service/SpamFilterService.php | 14 +++--- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/lib/Command/FilterLegitimateDomainsFromSpamReport.php b/lib/Command/FilterLegitimateDomainsFromSpamReport.php index 604a9a7..cedf1af 100644 --- a/lib/Command/FilterLegitimateDomainsFromSpamReport.php +++ b/lib/Command/FilterLegitimateDomainsFromSpamReport.php @@ -6,7 +6,7 @@ namespace OCA\EmailRecovery\Command; use OCA\EmailRecovery\AppInfo\Application; use OCA\EmailRecovery\Service\SpamFilterService; -use OCP\ILogger; +use Psr\Log\LoggerInterface; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; @@ -15,11 +15,11 @@ use Symfony\Component\Console\Output\OutputInterface; class FilterLegitimateDomainsFromSpamReport extends Command { private SpamFilterService $spamFilterService; - private ILogger $logger; + private LoggerInterface $logger; public function __construct( SpamFilterService $spamFilterService, - ILogger $logger + LoggerInterface $logger ) { parent::__construct(); $this->spamFilterService = $spamFilterService; @@ -31,13 +31,15 @@ class FilterLegitimateDomainsFromSpamReport extends Command { ->setName(Application::APP_ID . ':filter-legitimate-domains') ->setDescription('Filter out legitimate domains from spam account report and output to console') ->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file') - ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output'); + ->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output') + ->addOption('quiet', 'q', InputOption::VALUE_NONE, 'Suppress progress messages and only output filtered data'); } protected function execute(InputInterface $input, OutputInterface $output): int { try { $inputFile = $input->getArgument('input-file'); $includeRecoveryEmail = $input->getOption('include-recovery-email'); + $quiet = $input->getOption('quiet'); // Validate and read input file $spamUserIds = $this->spamFilterService->readSpamReportFile($inputFile, $output); @@ -48,11 +50,11 @@ class FilterLegitimateDomainsFromSpamReport extends Command { // Process the spam users and filter legitimate domains $results = $this->spamFilterService->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output); - // Display summary - $this->displaySummary($results, $output); + // Log summary + $this->logSummary($results); - // Write filtered results to console - $this->writeResultsToConsole($results['filteredEntries'], $output); + // Write filtered results to console (data only) + $this->outputFilteredResults($results['filteredEntries'], $output, $quiet); return Command::SUCCESS; } catch (\Throwable $e) { @@ -63,38 +65,38 @@ class FilterLegitimateDomainsFromSpamReport extends Command { } /** - * Displays the filtering summary + * Logs the filtering summary */ - private function displaySummary(array $results, OutputInterface $output): void { + private function logSummary(array $results): void { $totalEntries = $results['totalEntries']; $legitimateCount = $results['legitimateCount']; $spamCount = $results['spamCount']; + $invalidCount = $totalEntries - $legitimateCount - $spamCount; - $output->writeln(''); - $output->writeln('=== Filtering Summary ==='); - $output->writeln('Total entries processed: ' . $totalEntries); - $output->writeln('Legitimate domains (filtered out): ' . $legitimateCount); - $output->writeln('Spam entries (kept): ' . $spamCount); - $output->writeln('Invalid entries (skipped): ' . ($totalEntries - $legitimateCount - $spamCount)); + $this->logger->info('=== Filtering Summary ==='); + $this->logger->info('Total entries processed: ' . $totalEntries); + $this->logger->info('Legitimate domains (filtered out): ' . $legitimateCount); + $this->logger->info('Spam entries (kept): ' . $spamCount); + $this->logger->info('Invalid entries (skipped): ' . $invalidCount); } /** - * Writes the filtered results to console (stdout) + * Outputs the filtered results to console (stdout) and logs status */ - private function writeResultsToConsole(array $filteredEntries, OutputInterface $output): void { + private function outputFilteredResults(array $filteredEntries, OutputInterface $output, bool $quiet = false): void { // no entries to write if (empty($filteredEntries)) { - $output->writeln('No spam entries to output (all were legitimate domains)'); + $this->logger->info('No spam entries to output (all were legitimate domains)'); return; } - $output->writeln('Outputting ' . count($filteredEntries) . ' filtered entries to console...'); + $this->logger->info('Outputting ' . count($filteredEntries) . ' filtered entries to console'); - // Write each entry to console (stdout) + // Write each entry to stdout (pure data only) foreach ($filteredEntries as $entry) { echo $entry . "\n"; } - $output->writeln('Successfully output ' . count($filteredEntries) . ' entries to console'); + $this->logger->info('Successfully output ' . count($filteredEntries) . ' entries to console'); } } diff --git a/lib/Service/SpamFilterService.php b/lib/Service/SpamFilterService.php index db2f861..06d3538 100644 --- a/lib/Service/SpamFilterService.php +++ b/lib/Service/SpamFilterService.php @@ -4,17 +4,17 @@ declare(strict_types=1); namespace OCA\EmailRecovery\Service; -use OCP\ILogger; +use Psr\Log\LoggerInterface; use Symfony\Component\Console\Output\OutputInterface; class SpamFilterService { private RecoveryEmailService $recoveryEmailService; - private ILogger $logger; + private LoggerInterface $logger; private array $processingContext = []; public function __construct( RecoveryEmailService $recoveryEmailService, - ILogger $logger + LoggerInterface $logger ) { $this->recoveryEmailService = $recoveryEmailService; $this->logger = $logger; @@ -30,7 +30,7 @@ class SpamFilterService { return null; } - $output->writeln('Reading spam account report from: ' . $inputFile . ''); + $this->logger->info('Reading spam account report from: ' . $inputFile); // Read the input file $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); @@ -57,7 +57,7 @@ class SpamFilterService { $spamCount = 0; $filteredEntries = []; - $output->writeln('Processing ' . $totalEntries . ' entries...'); + $this->logger->info('Processing ' . $totalEntries . ' entries...'); // Set the context for the callback methods $this->processingContext = [ @@ -97,7 +97,7 @@ class SpamFilterService { $this->processingContext['filteredEntries'][] = $userId; } - $this->processingContext['output']->writeln("KEEPING SPAM: $userId -> $recoveryEmail"); + $this->logger->info("KEEPING SPAM: $userId -> $recoveryEmail"); } /** @@ -105,6 +105,6 @@ class SpamFilterService { */ public function onLegitimateDetected(string $userId, string $recoveryEmail): void { $this->processingContext['legitimateCount']++; - $this->processingContext['output']->writeln("REMOVING LEGITIMATE: $userId -> $recoveryEmail"); + $this->logger->info("REMOVING LEGITIMATE: $userId -> $recoveryEmail"); } } -- GitLab