Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 149fc182 authored by Guillaume Jacquart's avatar Guillaume Jacquart
Browse files

Merge branch '5629-add_one_trackers_list' into 'main'

feat:5629: Add whotracksme and Stevenblack trackers lists.

See merge request !11
parents 6effe006 ce7ba258
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
image: "node:16.15.0-slim"
image: "node:18.19-slim"

stages:
 - deploy
+19 −11
Original line number Diff line number Diff line
# tracker-list

List of trackers, used by PrivacyCentral app to detect and filter trackers. The list is a merge of Exodus and Adaway trackers lists, with custom /e/ rules.
List of trackers, used by PrivacyCentral app to detect and filter trackers. The list is a merge of Exodus, WhoTracksMe and StevenBlack trackers lists, with custom /e/ rules.
The list is located in list/e_trackers.json

## Auto upgrade list WIP
The list is automatically upgraded each day (if modified) with a CI script.

List can be manualy updated for now :
## Manualy upgrade list

List can be manualy updated :

1. build a new list : 
```
$ node build_list.js

   Starting list update
   adaway OK
   exodus OK
   enrich exodus OK
   Trackers list finished
   DONE
Fetch and parse Whotracksme OK - 2404 trackers
Fetch and parse Exodus OK - 260 trackers
updateWithTrackerList - baseList 3784 with otherList 260
updateWithTrackerList - remaining Others: 0
updateWithTrackerList now total of baseList 3784
updateWithTrackerList - baseList 3784 with otherList 2404
updateWithTrackerList - remaining Others: 0
updateWithTrackerList now total of baseList 3784
1 hostnames lists with each : 141886 hostnames
hostnames total : 141886
0 trackers deprecated, to remove
Trackers list finished, 3784 trackers, 21224 hostnames
No changes in trackers, abort updating.

```
2. Chech validity of the list :
+138 −141
Original line number Diff line number Diff line
const outputFile = "list/e_trackers.json"

const adawayUri = "https://adaway.org/hosts.txt"
const exodusUri = "https://reports.exodus-privacy.eu.org/api/trackers"
const EXIT_CODE_NO_CHANGES = 5

const whitelistedDomains = [
    "login.microsoftonline.com",
]

const https = require('https')
const fs = require('fs')
const url = require('url')

const EXIT_CODE_NO_CHANGES = 5

function fetch(url) {
    return new Promise((resolve, reject) => {
      https.get(url, (response) => {
            let data = ""
            response.on('data', (chunk) => { data += chunk })
            response.on('end', () => resolve(data))
        }).on('error', reject)      
    })
}
const outputFile = "./list/e_trackers.json"

const fs = require('fs')

let adawayText = null
let exodusJson = null
const exodus = require('./src/exodus_trackers.js')
const whotracksme = require('./src/whotracksme_trackers.js')
const hostsfiles = require('./src/hostsfiles_hostnames.js')

let hostnames = []

let trackers = []
const eTrackers = require("./" + outputFile).trackers.map(it => {
    it.dirty = true
    return it 
}).sort((a, b) => b.id.localeCompare(a.id))

console.log("Starting list update")
function remove(list, item) {
  list.splice(list.indexOf(item), 1)
}

fetch(adawayUri)
.then(text => {
    adawayText = text
    hostnames = parseHostToHostnames(text)
    console.log("adaway OK")
}).then(toto => {
    return fetch(exodusUri)
})
.then(response => { return JSON.parse(response) })
.then(json => {
    exoduxJson = json
    console.log("exodus OK")
})
.then(toto => {
    trackers = exodusToETrackers(exoduxJson)
function updateWithTrackerList(baseList, otherList) {
  const mergeTrackers = (base, other) => {
    base.hostnames = Array.from(new Set(base.hostnames.concat(other.hostnames)))
    base.name = other.name
    base.dirty = undefined
    base.link = (base.link) ? base.link : other.link
    base.exodusId = (base.exodusId) ? base.exodusId : other.exodusId
  }

    hostnames = applyOutbrainRule(trackers, hostnames)
    hostnames = enrichExodusWithAdaway(trackers, hostnames)
    console.log("enrich exodus OK")
    hostnames = createdLevel2Trackers(trackers, hostnames)
    console.log("define adaway trackers OK")
    removeWhitelistedHostnames(trackers)
    console.log("remove whitelisted hostnames OK")
    console.log("Trackers list finished")
})
.then(toto => {
    if (listHasNoChanges()) {
    	process.exitCode = EXIT_CODE_NO_CHANGES
        throw "No changes in trackers, abort updating."
  console.log("updateWithTrackerList - baseList " + baseList.length + " with otherList " + otherList.length)
  baseList.forEach(tracker => {    
    const sameId = otherList.find(candidate => candidate.id == tracker.id)
      if (sameId) {
        mergeTrackers(tracker, sameId)
        remove(otherList, sameId)
      } else {
        otherList.some(candidate => {
          if (tracker.hostnames.some(host => candidate.hostnames.some(h2 => h2 == host))) {
	    mergeTrackers(tracker, candidate)
            remove(otherList, candidate)
            return true
          } else {
            return false
          }
        })
.then(toto => {
    return printTrackers()
})
.then(trackersJson => {
    return saveTrackersFile(trackersJson)
})
.then(toto => {
    console.log("DONE")
})
.catch(err => {
    if (process.exitCode === undefined) {
        process.exitCode = 1
      }
    console.log(err)
   })
    
   console.log("updateWithTrackerList - remaining Others: " + otherList.length)
  
function parseHostToHostnames(hostText) {
    return hostText
        .split("\n")
        .filter(line => { return !line.startsWith("#") && line != ""})
        .map(line => line.split(" ")[1])
        .filter(it => it != "")
   Array.prototype.push.apply(baseList, otherList)
   console.log("updateWithTrackerList now total of baseList " + baseList.length)
}

function exodusToETrackers(exodusTrackers) {
    return Object.values(exodusTrackers.trackers)
    .filter(tracker => tracker.network_signature != "")
    .map(tracker => {
        const id = "exodus_" + tracker.id
        const hostnames = tracker.network_signature
            .replace(/\\/g, "")
            .split("|")
        return {
            id: id,
            hostnames: hostnames,
            name: tracker.name,
            exodusId: tracker.id
function extractSubhostnamesToTracker(hostname, tracker, hostnameSet) {
  const toDelete = new Set()
  hostnameSet.forEach(subhost => {
    if (subhost.endsWith(hostname)) {
      if (!tracker.hostnames.includes(subhost)) {
        tracker.hostnames.push(subhost)
      }
      tracker.dirty = undefined
      toDelete.add(subhost)
    }
  })
  toDelete.forEach(it => hostnameSet.delete(it))
}


function applyOutbrainRule(trackersList, adawayList) {
function applyOutbrainRule(trackersList, hostnameSet) {
  const tracker = trackersList.find(it => it.name === "OutBrain")
  if (tracker != null) {
        return extractSubhostnamesToTracker('outbrainimg.com', tracker, adawayList)
    extractSubhostnamesToTracker('outbrainimg.com', tracker, hostnameSet)
  }
    return adawayList
}

function extractSubhostnamesToTracker(hostname, tracker, adawayList) {
    let hostnamesSet = new Set(tracker.hostnames)
    adawayList.filter(it => it.endsWith(hostname))
        .forEach(subhost => hostnamesSet.add(subhost))
    tracker.hostnames = Array.from(hostnamesSet)
    return adawayList.filter(it => !it.endsWith(hostname))    
}

function enrichExodusWithAdaway(trackersList, adawayList) {
    trackersList.forEach(tracker => {
function enrichTrackersWithHostnames(trackerList, hostnameSet) {
  trackerList.forEach(tracker => {
    tracker.hostnames.forEach(hostname => {
            adawayList = extractSubhostnamesToTracker(hostname, tracker, adawayList)
      extractSubhostnamesToTracker(hostname, tracker, hostnameSet)
    })
  })
    return adawayList
}

function createdLevel2Trackers(trackersList, adawayList) {

    while(adawayList.length > 0) {
        let hostname = adawayList[0]
    //return adawayList.reduce((acc, hostname) => {

        let lv2 = toLevel2Domain(hostname)
        let tracker = {
            id: "adaway_" + lv2,
            name: lv2,
            hostnames: []
        }
        trackersList.push(tracker)
        adawayList = extractSubhostnamesToTracker(lv2, tracker, adawayList)
    }
    return adawayList
}

function toLevel2Domain(hostname) {
    try {
        let parts = hostname.split("\.")
        return parts.slice(parts.length - 2, parts.length).join(".")
    } catch {
            console.log(hostname)
    }
function keepOnlyHostnamesInHostnamesSet(trackerList, hostnameSet) {
  console.log("hostnameSet.size : " + hostnameSet.size)
  const deleted = new Set()
  trackerList.forEach(tracker => {
    tracker.hostnames
      .filter(it => it.split(".").length == 2 && !hostnameSet.has(it))
      .forEach(it => {
        remove(tracker.hostnames, it)
        deleted.add(it)
      })
  })
  console.log("Deleted hostnames not in hostnames list : " + deleted.size)
}

function removeWhitelistedHostnames(trackersList) {
@@ -173,19 +114,75 @@ function removeWhitelistedHostnames(trackersList) {
function listHasNoChanges() {
    // Compare actual list with the builded one (without the createdAt field!)
    const oldTrackers = require("./" + outputFile).trackers
    return JSON.stringify(trackers) == JSON.stringify(oldTrackers)
    return JSON.stringify(eTrackers) == JSON.stringify(oldTrackers)
}


function printTrackers() {    
    return JSON.stringify({
        trackers: trackers,
        trackers: eTrackers,
        createdAt: new Date().toISOString()
    })
}



function saveTrackersFile(trackersJson) {
    fs.writeFileSync(outputFile, trackersJson)
}


Promise.all([
    exodus.getTrackers(),
    whotracksme.getTrackers()
]).then(trackerLists => {
    trackerLists.forEach(otherList => updateWithTrackerList(eTrackers, otherList))
    return eTrackers
}).then(eTrackers => hostsfiles.getHostnames()
).then(hostnameSet => {
    const hostnameSetToConsume = new Set(hostnameSet)
    applyOutbrainRule(eTrackers, hostnameSetToConsume)
    enrichTrackersWithHostnames(eTrackers, hostnameSetToConsume)
    
    // Remove 10% of the hostnames, 
    // but should avoid unaccessible sites like adob.com or snapchat.com
    keepOnlyHostnamesInHostnamesSet(eTrackers, hostnameSet)
    removeWhitelistedHostnames(eTrackers)
    return eTrackers
}).then(eTrackers => {
  const deprecatedTrackers = eTrackers.filter(it => it.dirty)
  console.log(deprecatedTrackers.length + " trackers deprecated, to remove")
  if (deprecatedTrackers.length > 0) {
    console.log(deprecatedTrackers.map(it => it.name))
    console.log("before clean up: " + eTrackers.length)
    eTrackers.filter(it => it.dirty).forEach(it => {
      remove(eTrackers, it)
    })
    console.log("after clean up: " + eTrackers.length)
  }
   
  const trackersCount = eTrackers.length
  const hostnamesCount = eTrackers.reduce((acc, tracker) => acc + tracker.hostnames.length, 0)
  console.log("Trackers list finished, " + trackersCount + " trackers, " + hostnamesCount + " hostnames")
  return eTrackers
})
.then(trackers => {
  if (listHasNoChanges()) {
    process.exitCode = EXIT_CODE_NO_CHANGES
    throw "No changes in trackers, abort updating."
  }
})
.then(toto => {
  return printTrackers()
})
.then(trackersJson => {
  return saveTrackersFile(trackersJson)
})
.then(toto => {
  console.log("DONE")
})
.catch(err => {
  if (process.exitCode === undefined) {
    process.exitCode = 1
  }
  console.log(err)
})
+1 −1

File changed.

Preview size limit exceeded, changes collapsed.

src/exodus_trackers.js

0 → 100644
+30 −0
Original line number Diff line number Diff line
const exodusUri = "https://reports.exodus-privacy.eu.org/api/trackers"

const fetcher = require("./fetch_promise.js")

function exodusToETrackers(exodusTrackers) {
    return Object.values(exodusTrackers.trackers)
    .filter(tracker => tracker.network_signature != "")
    .map(tracker => {
        const id = "exodus_" + tracker.id
        const hostnames = tracker.network_signature
            .replace(/\\/g, "")
            .split("|")
        return {
            id: id,
            hostnames: hostnames,
            name: tracker.name,
            exodusId: tracker.id,
            link: "https://reports.exodus-privacy.eu.org/trackers/" + tracker.id
        }
    })
}

exports.getTrackers = () => fetcher.fetch(exodusUri)
  .then(response => { 
    const exodusData = JSON.parse(response) 
    const exodusTrackers = exodusToETrackers(exodusData)
    console.log("Fetch and parse Exodus OK - " + exodusTrackers.length + " trackers")
    return exodusTrackers
  })
Loading