Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 5f4a085f authored by Alexandre Flament's avatar Alexandre Flament Committed by GitHub
Browse files

Merge pull request #2595 from dalf/update-wikidata-units

[mod] update wikidata_units.json and fetch_wikidata_units.py
parents 94c9320c 93d1da49
Loading
Loading
Loading
Loading
+144 −46
Original line number Diff line number Diff line
{
    "Q199": "1",
    "Q100036106": "int nmi",
    "Q100149279": "°We",
    "Q100995": "lb",
    "Q101194838": "GHz/V",
    "Q101463141": "ym²",
    "Q101463237": "zm²",
    "Q101463321": "am²",
    "Q101463409": "fm²",
    "Q101463496": "pm²",
    "Q101463679": "hm²",
    "Q101464050": "Mm²",
    "Q101464215": "Gm²",
    "Q101464369": "Tm²",
    "Q101464499": "Pm²",
    "Q101464624": "Em²",
    "Q101464753": "Zm²",
    "Q101464875": "Ym²",
    "Q101515060": "g/J",
    "Q101875087": "cd/cm²",
    "Q101877596": "g/ml",
    "Q101879174": "dm/s",
    "Q102068844": "cm⁻³",
    "Q102129339": "min⁻¹",
    "Q102129428": "h⁻¹",
    "Q102129592": "d⁻¹",
    "Q102130673": "ym/s",
    "Q102130674": "zm/s",
    "Q102130677": "am/s",
    "Q102130679": "fm/s",
    "Q102130681": "pm/s",
    "Q102130684": "nm/s",
    "Q102130686": "μm/s",
    "Q102130688": "mm/s",
    "Q102130690": "dam/s",
    "Q102130692": "hm/s",
    "Q102130694": "Mm/s",
    "Q102130696": "Gm/s",
    "Q102130698": "Tm/s",
    "Q102130700": "Pm/s",
    "Q102130702": "Em/s",
    "Q102130704": "Zm/s",
    "Q102130706": "Ym/s",
    "Q102130743": "ym/s²",
    "Q102130745": "zm/s²",
    "Q102130747": "am/s²",
    "Q102130748": "fm/s²",
    "Q102130751": "pm/s²",
    "Q102130753": "nm/s²",
    "Q102130755": "μm/s²",
    "Q102130756": "mm/s²",
    "Q102130758": "dm/s²",
    "Q102130759": "dam/s²",
    "Q102130761": "hm/s²",
    "Q102130762": "km/s²",
    "Q102130765": "Mm/s²",
    "Q102130767": "Gm/s²",
    "Q102130769": "Tm/s²",
    "Q102130771": "Pm/s²",
    "Q102130773": "Em/s²",
    "Q102130775": "Zm/s²",
    "Q102130777": "Ym/s²",
    "Q102178883": "dm³/h",
    "Q1022113": "cm³",
    "Q102573": "Bq",
    "Q103246": "Sv",
@@ -9,12 +69,16 @@
    "Q10380431": "TJ",
    "Q1040401": "das",
    "Q1040427": "hs",
    "Q1042866": "Zibit",
    "Q104117265": "Bi",
    "Q1042866": "Zib",
    "Q104907398": "μN m",
    "Q104907399": "mN m",
    "Q1050958": "inHg",
    "Q1051665": "m/s²",
    "Q1052397": "rad",
    "Q1054140": "Mm",
    "Q10543042": "Ym",
    "Q105519288": "B SPL",
    "Q1057069": "hg",
    "Q1063756": "rad/s",
    "Q1063786": "in²",
@@ -22,33 +86,34 @@
    "Q1066138": "Ps",
    "Q1067722": "Fg",
    "Q1069725": "p.",
    "Q1072404": "K",
    "Q1084321": "Tb/s",
    "Q1086691": "fg",
    "Q1091257": "tex",
    "Q1092296": "a",
    "Q1104069": "CAD$",
    "Q1104069": "$",
    "Q11061003": "μm²",
    "Q11061005": "nm²",
    "Q1131660": "st",
    "Q1137675": "cr",
    "Q1140444": "Zbit",
    "Q1140577": "Ybit",
    "Q1152074": "Pbit",
    "Q1152323": "Tbit",
    "Q1140444": "Zb",
    "Q1140577": "Yb",
    "Q1152074": "Pb",
    "Q1152323": "Tb",
    "Q1165799": "mil",
    "Q11776930": "Mg",
    "Q11830636": "psf",
    "Q11929860": "kpc",
    "Q1194225": "lbf",
    "Q1194580": "Mibit",
    "Q1195111": "Ebit",
    "Q1194580": "Mib",
    "Q1195111": "Eb",
    "Q1196837": "ω_P",
    "Q1197459": "Ms",
    "Q11982285": "Em³",
    "Q11982288": "Zm³",
    "Q11982289": "Tm³",
    "Q12011178": "Zs",
    "Q1204894": "Gibit",
    "Q1204894": "Gib",
    "Q12257695": "Eb/s",
    "Q12257696": "EB/s",
    "Q12261466": "kB/s",
@@ -59,7 +124,7 @@
    "Q12269308": "Zb/s",
    "Q12269309": "ZB/s",
    "Q1247300": "cm H₂O",
    "Q12714022": "sh cwt",
    "Q12714022": "cwt",
    "Q12789864": "GeV",
    "Q12874593": "W h",
    "Q128822": "kn",
@@ -71,9 +136,9 @@
    "Q1323615": "oz t",
    "Q132643": "kr",
    "Q13400897": "g",
    "Q13479685": "mm wg",
    "Q1351253": "Eibit",
    "Q1351334": "Pibit",
    "Q13479685": "mm H2O",
    "Q1351253": "Eib",
    "Q1351334": "Pib",
    "Q13542672": "Ry",
    "Q13548586": "THz",
    "Q13582667": "kgf/cm²",
@@ -88,13 +153,15 @@
    "Q14158377": "A_P",
    "Q14623803": "MDa",
    "Q14623804": "kDa",
    "Q1472674": "Sv",
    "Q1472674": "S",
    "Q14754979": "Zg",
    "Q14786969": "MJ",
    "Q14850704": "℧",
    "Q14913554": "Ys",
    "Q14914907": "th",
    "Q14916719": "Gpc",
    "Q14923662": "Pm³",
    "Q1501273": "HU",
    "Q1511773": "LSd",
    "Q15120301": "l atm",
    "Q1542309": "xu",
@@ -110,7 +177,7 @@
    "Q163354": "H",
    "Q1640501": "hyl",
    "Q1645498": "μg",
    "Q16859309": "lb·ft",
    "Q16859309": "lb ft",
    "Q169893": "S",
    "Q170804": "Wb",
    "Q17093295": "m/h",
@@ -140,7 +207,7 @@
    "Q182429": "m/s",
    "Q1826195": "dl",
    "Q18413919": "cm/s",
    "Q184172": "FF",
    "Q184172": "F",
    "Q185078": "a",
    "Q185153": "erg",
    "Q185648": "Torr",
@@ -171,11 +238,10 @@
    "Q2029519": "hl",
    "Q203567": "₦",
    "Q2042279": "m H₂O",
    "Q204737": "៛",
    "Q2051195": "GWh",
    "Q2055118": "ppb",
    "Q2064166": "fc",
    "Q206600": "ރ",
    "Q206600": "MRF",
    "Q20706220": "cmm",
    "Q20706221": "dmm",
    "Q2080811": "vol%",
@@ -196,9 +262,11 @@
    "Q21075844": "ml/l",
    "Q21077820": "mg/m³",
    "Q21091747": "mg/kg",
    "Q211256": "mph",
    "Q211256": "mi/h",
    "Q21154419": "PD",
    "Q211580": "BTU (th)",
    "Q212120": "A h",
    "Q213005": "G$",
    "Q2140397": "in³",
    "Q214377": "ell",
    "Q2143992": "kHz",
@@ -211,7 +279,7 @@
    "Q215571": "N m",
    "Q21604951": "g/m³",
    "Q2165290": "yd³",
    "Q216880": "kp",
    "Q216880": "kgf",
    "Q217208": "a",
    "Q2175964": "dm³",
    "Q218593": "in",
@@ -229,11 +297,14 @@
    "Q229354": "Ci",
    "Q232291": "mi²",
    "Q2332346": "ml",
    "Q235729": "y (365 days)",
    "Q23808021": "oz (ap.)",
    "Q23823681": "TW",
    "Q23925410": "gal (UK)",
    "Q23925413": "gal (US)",
    "Q23931040": "dam²",
    "Q23931103": "nmi²",
    "Q240468": "syr£",
    "Q2414435": "$b.",
    "Q242988": "Lib$",
    "Q2438073": "ag",
@@ -252,7 +323,7 @@
    "Q25511288": "mb",
    "Q2553708": "MV",
    "Q2554092": "kV",
    "Q259502": "AU$",
    "Q259502": "A$",
    "Q260126": "rem",
    "Q2612219": "Pg",
    "Q261247": "ct",
@@ -306,8 +377,11 @@
    "Q30001831": "aV",
    "Q30001832": "aW",
    "Q30001833": "aWb",
    "Q3013059": "kyr",
    "Q3194304": "kbit",
    "Q3013059": "ka",
    "Q304479": "tr",
    "Q305896": "DPI",
    "Q31889818": "ppq",
    "Q3194304": "kb",
    "Q3207456": "mW",
    "Q321017": "R",
    "Q3221356": "ym",
@@ -330,10 +404,10 @@
    "Q3312063": "fL",
    "Q3320608": "kW",
    "Q3331719": "dm²",
    "Q3332689": "ToR",
    "Q3332814": "Mbit",
    "Q3332689": "RT",
    "Q3332814": "Mb",
    "Q3396758": "daa",
    "Q3414243": "rps",
    "Q3414243": "qps",
    "Q3421309": "R_J",
    "Q3495543": "mbar",
    "Q355198": "px",
@@ -343,11 +417,11 @@
    "Q376660": "nat",
    "Q37732658": "°R",
    "Q3773454": "Mpc",
    "Q3815076": "Kibit",
    "Q3815076": "Kib",
    "Q3833309": "£",
    "Q3858002": "mA h",
    "Q3867152": "ft/s²",
    "Q389062": "Tibit",
    "Q389062": "Tib",
    "Q3902688": "pl",
    "Q3902709": "ps",
    "Q39360235": "US lea",
@@ -359,7 +433,7 @@
    "Q39462789": "µin²",
    "Q39467934": "kgf/m²",
    "Q39469927": "N/m²",
    "Q39617688": "cwt long",
    "Q39617688": "cwt",
    "Q39617818": "t lb",
    "Q39628023": "y",
    "Q39699418": "cm/s²",
@@ -367,14 +441,14 @@
    "Q39709980": "bd",
    "Q39710113": "bhp EDR",
    "Q3972226": "kL",
    "Q4041686": "iwg",
    "Q4041686": "in H20",
    "Q4068266": "Ʒ",
    "Q4176683": "aC",
    "Q420266": "oz. fl.",
    "Q420266": "fl oz",
    "Q42319606": "people/m²",
    "Q4243638": "km³",
    "Q4456994": "mF",
    "Q469356": "tn. sh.",
    "Q469356": "T",
    "Q476572": "Ha",
    "Q482798": "yd",
    "Q483261": "Da",
@@ -390,15 +464,18 @@
    "Q514845": "pz",
    "Q5195628": "hm³",
    "Q5198770": "dam³",
    "Q524410": "byr",
    "Q524410": "Ga",
    "Q5299480": "DPCm",
    "Q53393488": "PHz",
    "Q53393490": "EHz",
    "Q53393494": "ZHz",
    "Q53393498": "YHz",
    "Q53393659": "ML",
    "Q53393664": "GL",
    "Q53393669": "El",
    "Q53393674": "ZL",
    "Q53393678": "YL",
    "Q53393768": "zl",
    "Q53393771": "yL",
    "Q53393868": "GJ",
    "Q53393886": "PJ",
@@ -492,7 +569,7 @@
    "Q54083813": "Zkat",
    "Q5409016": "MVA",
    "Q5465723": "ft-pdl",
    "Q549389": "bit/s",
    "Q549389": "b/s",
    "Q550341": "V A",
    "Q552299": "ch",
    "Q55442349": "U/L",
@@ -523,6 +600,8 @@
    "Q6170164": "yg",
    "Q6171168": "zg",
    "Q61756607": "yd",
    "Q61771602": "ft",
    "Q61771670": "in",
    "Q61793198": "rd",
    "Q61794766": "ch (US survey)",
    "Q61994988": "Wth",
@@ -534,13 +613,12 @@
    "Q6414556": "kip",
    "Q648908": "bya",
    "Q64996135": "gal (US)/min",
    "Q65028392": "mm/yr",
    "Q65028392": "mm/a",
    "Q651336": "M_J",
    "Q6517513": "dag",
    "Q667419": "UK t",
    "Q681996": "M⊕",
    "Q685662": "p_P",
    "Q6859652": "mm Hg",
    "Q686163": "$",
    "Q68725821": "°Rø",
    "Q68726230": "°De",
@@ -582,20 +660,23 @@
    "Q70444514": "Ymol",
    "Q70444609": "Pmol",
    "Q712226": "km²",
    "Q717310": "Mg",
    "Q72081071": "MeV",
    "Q723733": "ms",
    "Q730251": "ft·lbf",
    "Q732707": "MHz",
    "Q73408": "K",
    "Q7350781": "Mb/s",
    "Q7398951": "PPI",
    "Q743895": "bpm",
    "Q748716": "ft/s",
    "Q750178": "‱",
    "Q752079": "RT",
    "Q752197": "kJ/mol",
    "Q7672057": "TU",
    "Q777017": "dBm",
    "Q78754556": "rot",
    "Q78756901": "rev",
    "Q78756901": "r",
    "Q78757683": "windings",
    "Q79726": "kB",
    "Q79735": "MB",
@@ -637,14 +718,16 @@
    "Q848856": "dam",
    "Q851872": "o",
    "Q854546": "Gm",
    "Q855161": "Yibit",
    "Q855161": "Yib",
    "Q856240": "ft³/min",
    "Q857027": "ft²",
    "Q85854198": "MN",
    "Q864818": "abA",
    "Q87262709": "kΩ",
    "Q87416053": "MΩ",
    "Q88296091": "tsp",
    "Q89473028": "bu (UK)",
    "Q89662131": "pt (UK)",
    "Q901492": "ph",
    "Q9026416": "MWth",
    "Q9048643": "nl",
    "Q905912": "L",
@@ -653,7 +736,9 @@
    "Q911730": "nx",
    "Q914151": "P_P",
    "Q915169": "F_P",
    "Q93318": "nmi",
    "Q93318": "M",
    "Q93678895": "gill (US)",
    "Q93679498": "gill (UK)",
    "Q940052": "q",
    "Q94076025": "dalm",
    "Q94076717": "dakat",
@@ -664,6 +749,7 @@
    "Q94415255": "GC",
    "Q94415438": "Yrad",
    "Q94415526": "YC",
    "Q94415561": "krad",
    "Q94415782": "Mrad",
    "Q94416260": "GN",
    "Q94416535": "cN",
@@ -943,6 +1029,7 @@
    "Q96106385": "h°C",
    "Q96106393": "M°C",
    "Q96236286": "G°C",
    "Q96312779": "μas",
    "Q97059641": "p°C",
    "Q97059652": "T°C",
    "Q97143826": "P°C",
@@ -953,9 +1040,21 @@
    "Q97143843": "z°C",
    "Q97143849": "Y°C",
    "Q97143851": "a°C",
    "Q98492214": "den",
    "Q98538634": "eV/m²",
    "Q98635536": "eV/m",
    "Q98642859": "eV m²/kg",
    "Q98793302": "qt (UK)",
    "Q98793408": "liq qt (US)",
    "Q98793687": "dry qt (US)",
    "Q99476928": "gf",
    "Q99487704": "ppt",
    "Q99490009": "BTU (IT)",
    "Q99490479": "BTU (39 °F)",
    "Q99490986": "BTU (59 °F)",
    "Q99491193": "BTU (60 °F)",
    "Q99491447": "BTU (mean)",
    "Q99492167": "m Hg",
    "Q11229": "%",
    "Q11570": "kg",
    "Q11573": "m",
@@ -965,8 +1064,7 @@
    "Q12129": "pc",
    "Q12438": "N",
    "Q16068": "DM",
    "Q1811": "ua",
    "Q20764": "Myr",
    "Q20764": "Ma",
    "Q2101": "e",
    "Q25235": "h",
    "Q25236": "W",
@@ -979,25 +1077,25 @@
    "Q25517": "m³",
    "Q33680": "rad",
    "Q35852": "ha",
    "Q36384": "equiv",
    "Q36384": "Eq",
    "Q3710": "ft",
    "Q39274": "Sv",
    "Q39369": "Hz",
    "Q41509": "mol",
    "Q41803": "g",
    "Q42289": "°F",
    "Q4406": "TV$",
    "Q4406": "$T",
    "Q44395": "Pa",
    "Q4587": "Le",
    "Q4588": "WS$",
    "Q4592": "F$",
    "Q4596": "Rs",
    "Q4597": "$",
    "Q47083": "Ω",
    "Q48013": "oz",
    "Q4917": "US$",
    "Q50094": "Np",
    "Q50098": "B",
    "Q531": "ly",
    "Q531": "l.y.",
    "Q5329": "dB",
    "Q573": "d",
    "Q577": "a",
+26 −17
Original line number Diff line number Diff line
@@ -12,31 +12,40 @@ from searx import searx_dir
from searx.engines.wikidata import send_wikidata_query


# the response contains duplicate ?item with the different ?symbol
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
# even if a ?item has different ?symbol of the same rank.
# A deterministic result 
# see:
# * https://www.wikidata.org/wiki/Help:Ranking
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
# * https://w.wiki/32BT
#   see the result for https://www.wikidata.org/wiki/Q11582
#   there are multiple symbols the same rank
SARQL_REQUEST = """
SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit
SELECT DISTINCT ?item ?symbol
WHERE
{
  ?item wdt:P31/wdt:P279 wd:Q47574 .
?item wdt:P5061 ?symbol.
  ?item p:P5061 ?symbolP .
  ?symbolP ps:P5061 ?symbol ;
           wikibase:rank ?rank .
  FILTER(LANG(?symbol) = "en").
}
ORDER BY ?item
ORDER BY ?item DESC(?rank) ?symbol
"""


def get_data():
    def get_key(unit):
        return unit['item']['value'].replace('http://www.wikidata.org/entity/', '')

    def get_value(unit):
        return unit['symbol']['value']

    result = send_wikidata_query(SARQL_REQUEST)
    if result is not None:
        # sort the unit by entity name
        # so different fetchs keep the file unchanged.
        list(result['results']['bindings']).sort(key=get_key)
        return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']])
    results = collections.OrderedDict()
    response = send_wikidata_query(SARQL_REQUEST)
    for unit in response['results']['bindings']:
        name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
        unit = unit['symbol']['value']
        if name not in results:
            # ignore duplicate: always use the first one
            results[name] = unit
    return results


def get_wikidata_units_filename():