Loading src/whotracksme_trackers.js +62 −50 Original line number Diff line number Diff line Loading @@ -2,46 +2,52 @@ const fetcher = require("./fetch_promise.js") const whotracksmeUri = "https://raw.githubusercontent.com/whotracksme/whotracks.me/master/whotracksme/data/assets/trackerdb.sql" /* * createTrackersFromWhoTrackMe(wtmLines) build a list of Trackers objects * from the sql sources of who-tracks-me extracts. * * The data will be extracted from the lines like these: * INSERT INTO trackers VALUES('1000mercis','1000mercis',13,NULL,'1000mercis','2662',NULL,NULL); * INSERT INTO trackers VALUES('161media','Platform161',1,'https://platform161.com/','platform161','730',NULL,NULL); * * * They was primarily intended to populate the sql table defined like that: * CREATE TABLE trackers ( * id TEXT NOT NULL UNIQUE, * name TEXT NOT NULL, * category_id INTEGER, * website_url TEXT, * company_id TEXT, * ghostery_id TEXT, * notes TEXT, * alias TEXT REFERENCES trackers (id), * FOREIGN KEY (category_id) REFERENCES categories (id), * FOREIGN KEY (company_id) REFERENCES companies (id) * ); * * the caterogry_id is an enum defined as follow : * INSERT INTO categories VALUES(1,'advertising'); * INSERT INTO categories VALUES(2,'audio_video_player'); * INSERT INTO categories VALUES(3,'cdn'); * INSERT INTO categories VALUES(4,'comments'); * INSERT INTO categories VALUES(5,'consent'); * INSERT INTO categories VALUES(6,'customer_interaction'); * INSERT INTO categories VALUES(7,'email'); * INSERT INTO categories VALUES(8,'essential'); * INSERT INTO categories VALUES(9,'extensions'); * INSERT INTO categories VALUES(10,'hosting'); * INSERT INTO categories VALUES(11,'misc'); * INSERT INTO categories VALUES(12,'pornvertising'); * INSERT INTO categories VALUES(13,'site_analytics'); * INSERT INTO categories VALUES(14,'social_media'); * INSERT INTO categories VALUES(15,'telemetry'); * * */ function createTrackersFromWhoTrackMe(wtmLines) { const domainsByTrackers = parseDomainTrackers(wtmLines) // categories enum values // INSERT INTO categories VALUES(1,'advertising'); // INSERT INTO categories VALUES(2,'audio_video_player'); // INSERT INTO categories VALUES(3,'cdn'); // INSERT INTO categories VALUES(4,'comments'); // INSERT INTO categories VALUES(5,'consent'); // INSERT INTO categories VALUES(6,'customer_interaction'); // INSERT INTO categories VALUES(7,'email'); // INSERT INTO categories VALUES(8,'essential'); // INSERT INTO categories VALUES(9,'extensions'); // INSERT INTO categories VALUES(10,'hosting'); // INSERT INTO categories VALUES(11,'misc'); // INSERT INTO categories VALUES(12,'pornvertising'); // INSERT INTO categories VALUES(13,'site_analytics'); // INSERT INTO categories VALUES(14,'social_media'); // INSERT INTO categories VALUES(15,'telemetry'); // SQL Schema // CREATE TABLE trackers ( // id TEXT NOT NULL UNIQUE, // name TEXT NOT NULL, // category_id INTEGER, // website_url TEXT, // company_id TEXT, // ghostery_id TEXT, // notes TEXT, // alias TEXT REFERENCES trackers (id), // FOREIGN KEY (category_id) REFERENCES categories (id), // FOREIGN KEY (company_id) REFERENCES companies (id) // ); // Example of the line we will manually parse: // INSERT INTO trackers VALUES('1000mercis','1000mercis',13,NULL,'1000mercis','2662',NULL,NULL); // INSERT INTO trackers VALUES('161media','Platform161',1,'https://platform161.com/','platform161','730',NULL,NULL); const trackersList = [] wtmLines.forEach(l => { if (l.startsWith("INSERT INTO trackers VALUES(")) { Loading @@ -67,25 +73,31 @@ function createTrackersFromWhoTrackMe(wtmLines) { return trackersList } /* * parseDomainTrackers(wtmLines), build a Map: trackerId -> [domains] * from the sql sources of who-tracks-me extracts. * * The data will be extracted from the lines like these: * * INSERT INTO tracker_domains VALUES('1000mercis','mmtro.com',NULL); * INSERT INTO tracker_domains VALUES('161media','creative-serving.com',NULL); * * They was primarily intended to populate the sql table defined like that: * * CREATE TABLE tracker_domains ( * tracker TEXT NOT NULL, * domain TEXT UNIQUE NOT NULL, * notes TEXT, * FOREIGN KEY (tracker) REFERENCES trackers (id) * ); * */ function parseDomainTrackers(wtmLines) { // SQL schema // CREATE TABLE tracker_domains ( // tracker TEXT NOT NULL, // domain TEXT UNIQUE NOT NULL, // notes TEXT, // FOREIGN KEY (tracker) REFERENCES trackers (id) //); // // Example of the line we will manually parse: // INSERT INTO tracker_domains VALUES('1000mercis','mmtro.com',NULL); // INSERT INTO tracker_domains VALUES('161media','creative-serving.com',NULL); const domainsByTrackers = {} wtmLines.forEach(l => { if (l.startsWith("INSERT INTO tracker_domains VALUES(")) { const match = l.split("'") const tracker = match[1] let domains = domainsByTrackers[tracker] if (!domains) { domains = new Set() } Loading Loading
src/whotracksme_trackers.js +62 −50 Original line number Diff line number Diff line Loading @@ -2,46 +2,52 @@ const fetcher = require("./fetch_promise.js") const whotracksmeUri = "https://raw.githubusercontent.com/whotracksme/whotracks.me/master/whotracksme/data/assets/trackerdb.sql" /* * createTrackersFromWhoTrackMe(wtmLines) build a list of Trackers objects * from the sql sources of who-tracks-me extracts. * * The data will be extracted from the lines like these: * INSERT INTO trackers VALUES('1000mercis','1000mercis',13,NULL,'1000mercis','2662',NULL,NULL); * INSERT INTO trackers VALUES('161media','Platform161',1,'https://platform161.com/','platform161','730',NULL,NULL); * * * They was primarily intended to populate the sql table defined like that: * CREATE TABLE trackers ( * id TEXT NOT NULL UNIQUE, * name TEXT NOT NULL, * category_id INTEGER, * website_url TEXT, * company_id TEXT, * ghostery_id TEXT, * notes TEXT, * alias TEXT REFERENCES trackers (id), * FOREIGN KEY (category_id) REFERENCES categories (id), * FOREIGN KEY (company_id) REFERENCES companies (id) * ); * * the caterogry_id is an enum defined as follow : * INSERT INTO categories VALUES(1,'advertising'); * INSERT INTO categories VALUES(2,'audio_video_player'); * INSERT INTO categories VALUES(3,'cdn'); * INSERT INTO categories VALUES(4,'comments'); * INSERT INTO categories VALUES(5,'consent'); * INSERT INTO categories VALUES(6,'customer_interaction'); * INSERT INTO categories VALUES(7,'email'); * INSERT INTO categories VALUES(8,'essential'); * INSERT INTO categories VALUES(9,'extensions'); * INSERT INTO categories VALUES(10,'hosting'); * INSERT INTO categories VALUES(11,'misc'); * INSERT INTO categories VALUES(12,'pornvertising'); * INSERT INTO categories VALUES(13,'site_analytics'); * INSERT INTO categories VALUES(14,'social_media'); * INSERT INTO categories VALUES(15,'telemetry'); * * */ function createTrackersFromWhoTrackMe(wtmLines) { const domainsByTrackers = parseDomainTrackers(wtmLines) // categories enum values // INSERT INTO categories VALUES(1,'advertising'); // INSERT INTO categories VALUES(2,'audio_video_player'); // INSERT INTO categories VALUES(3,'cdn'); // INSERT INTO categories VALUES(4,'comments'); // INSERT INTO categories VALUES(5,'consent'); // INSERT INTO categories VALUES(6,'customer_interaction'); // INSERT INTO categories VALUES(7,'email'); // INSERT INTO categories VALUES(8,'essential'); // INSERT INTO categories VALUES(9,'extensions'); // INSERT INTO categories VALUES(10,'hosting'); // INSERT INTO categories VALUES(11,'misc'); // INSERT INTO categories VALUES(12,'pornvertising'); // INSERT INTO categories VALUES(13,'site_analytics'); // INSERT INTO categories VALUES(14,'social_media'); // INSERT INTO categories VALUES(15,'telemetry'); // SQL Schema // CREATE TABLE trackers ( // id TEXT NOT NULL UNIQUE, // name TEXT NOT NULL, // category_id INTEGER, // website_url TEXT, // company_id TEXT, // ghostery_id TEXT, // notes TEXT, // alias TEXT REFERENCES trackers (id), // FOREIGN KEY (category_id) REFERENCES categories (id), // FOREIGN KEY (company_id) REFERENCES companies (id) // ); // Example of the line we will manually parse: // INSERT INTO trackers VALUES('1000mercis','1000mercis',13,NULL,'1000mercis','2662',NULL,NULL); // INSERT INTO trackers VALUES('161media','Platform161',1,'https://platform161.com/','platform161','730',NULL,NULL); const trackersList = [] wtmLines.forEach(l => { if (l.startsWith("INSERT INTO trackers VALUES(")) { Loading @@ -67,25 +73,31 @@ function createTrackersFromWhoTrackMe(wtmLines) { return trackersList } /* * parseDomainTrackers(wtmLines), build a Map: trackerId -> [domains] * from the sql sources of who-tracks-me extracts. * * The data will be extracted from the lines like these: * * INSERT INTO tracker_domains VALUES('1000mercis','mmtro.com',NULL); * INSERT INTO tracker_domains VALUES('161media','creative-serving.com',NULL); * * They was primarily intended to populate the sql table defined like that: * * CREATE TABLE tracker_domains ( * tracker TEXT NOT NULL, * domain TEXT UNIQUE NOT NULL, * notes TEXT, * FOREIGN KEY (tracker) REFERENCES trackers (id) * ); * */ function parseDomainTrackers(wtmLines) { // SQL schema // CREATE TABLE tracker_domains ( // tracker TEXT NOT NULL, // domain TEXT UNIQUE NOT NULL, // notes TEXT, // FOREIGN KEY (tracker) REFERENCES trackers (id) //); // // Example of the line we will manually parse: // INSERT INTO tracker_domains VALUES('1000mercis','mmtro.com',NULL); // INSERT INTO tracker_domains VALUES('161media','creative-serving.com',NULL); const domainsByTrackers = {} wtmLines.forEach(l => { if (l.startsWith("INSERT INTO tracker_domains VALUES(")) { const match = l.split("'") const tracker = match[1] let domains = domainsByTrackers[tracker] if (!domains) { domains = new Set() } Loading