first commit

2025-06-26 15:38:10 +03:30
commit e928faf6d2
899 changed files with 403713 additions and 0 deletions
--- a/searxng_extra/init.py
+++ b/searxng_extra/init.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=missing-module-docstring
--- a/searxng_extra/docs_prebuild
+++ b/searxng_extra/docs_prebuild
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Script that implements some prebuild tasks needed by target docs.prebuild
+"""
+
+import sys
+import os.path
+import time
+from contextlib import contextmanager
+
+from searx import settings, get_setting, locales
+from searx.infopage import InfoPageSet, InfoPage
+
+_doc_user = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'docs', 'user'))
+
+
+def main():
+    locales.locales_initialize()
+    base_url = get_setting('server.base_url', None)
+    if base_url:
+        infopageset_ctx = _instance_infosetset_ctx(base_url)
+    else:
+        infopageset_ctx = _offline_infosetset_ctx()
+
+    with infopageset_ctx as infopageset:
+        for _, _, page in infopageset.iter_pages('en'):
+            fname = os.path.join(_doc_user, os.path.basename(page.fname))
+            with open(fname, 'w', encoding='utf-8') as f:
+                f.write(page.content)
+
+
+class OfflinePage(InfoPage):  # pylint: disable=missing-class-docstring
+
+    def get_ctx(self):
+        """Jinja context to render :py:obj:`DocPage.content` for offline purpose (no
+        links to SearXNG instance)"""
+
+        ctx = super().get_ctx()
+        ctx['link'] = lambda name, url: '`%s`' % name
+        ctx['search'] = lambda query: '`%s`' % query
+
+        return ctx
+
+
+@contextmanager
+def _offline_infosetset_ctx():
+    yield InfoPageSet(OfflinePage)
+
+
+@contextmanager
+def _instance_infosetset_ctx(base_url):
+    # The url_for functions in the jinja templates need all routes to be
+    # registered in the Flask app.
+
+    settings['server']['secret_key'] = ''
+    from searx.webapp import app  # pylint: disable=import-outside-toplevel
+
+    # Specify base_url so that url_for() works for base_urls.  If base_url is
+    # specified, then these values from are given preference over any Flask's
+    # generics (see flaskfix.py).
+
+    with app.test_request_context(base_url=base_url):
+        yield InfoPageSet()
+
+    # The searx.webapp import from above fires some HTTP requests, that's
+    # why we get a RuntimeError::
+    #
+    #     RuntimeError: The connection pool was closed while 1 HTTP \
+    #       requests/responses were still in-flight.
+    #
+    # Closing network won't help ..
+    #   from searx.network import network
+    #   network.done()
+
+    # waiting some seconds before ending the command line was the only solution I
+    # found ..
+
+    time.sleep(3)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/searxng_extra/standalone_searx.py
+++ b/searxng_extra/standalone_searx.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Script to run SearXNG from terminal.
+
+  DON'T USE THIS SCRIPT!!
+
+.. danger::
+
+   Be warned, using the ``standalone_searx.py`` won't give you privacy!
+
+   On the contrary, this script behaves like a SearXNG server: your IP is
+   exposed and tracked by all active engines (google, bing, qwant, ... ), with
+   every query!
+
+.. note::
+
+   This is an old and grumpy hack / SearXNG is a Flask application with
+   client/server structure, which can't be turned into a command line tool the
+   way it was done here.
+
+Getting categories without initiate the engine will only return `['general']`
+
+>>> import searx.engines
+... list(searx.engines.categories.keys())
+['general']
+>>> import searx.search
+... searx.search.initialize()
+... list(searx.engines.categories.keys())
+['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
+
+Example to use this script:
+
+.. code::  bash
+
+    $ python3 searxng_extra/standalone_searx.py rain
+
+"""  # pylint: disable=line-too-long
+
+import argparse
+import sys
+from datetime import datetime
+from json import dumps
+from typing import Any, Dict, List, Optional
+
+import searx
+import searx.preferences
+import searx.query
+import searx.search
+import searx.webadapter
+
+EngineCategoriesVar = Optional[List[str]]
+
+
+def get_search_query(
+    args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
+) -> searx.search.SearchQuery:
+    """Get  search results for the query"""
+    if engine_categories is None:
+        engine_categories = list(searx.engines.categories.keys())
+    try:
+        category = args.category.decode('utf-8')
+    except AttributeError:
+        category = args.category
+    form = {
+        "q": args.query,
+        "categories": category,
+        "pageno": str(args.pageno),
+        "language": args.lang,
+        "time_range": args.timerange,
+    }
+    preferences = searx.preferences.Preferences(['simple'], engine_categories, searx.engines.engines, [])
+    preferences.key_value_settings['safesearch'].parse(args.safesearch)
+
+    search_query = searx.webadapter.get_search_query_from_webapp(preferences, form)[0]
+    return search_query
+
+
+def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Remove parsed url from dict."""
+    for result in results:
+        del result['parsed_url']
+    return results
+
+
+def json_serial(obj: Any) -> Any:
+    """JSON serializer for objects not serializable by default json code.
+
+    :raise TypeError: raised when **obj** is not serializable
+    """
+    if isinstance(obj, datetime):
+        serial = obj.isoformat()
+        return serial
+    if isinstance(obj, bytes):
+        return obj.decode('utf8')
+    if isinstance(obj, set):
+        return list(obj)
+    raise TypeError("Type ({}) not serializable".format(type(obj)))
+
+
+def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
+    """Get result from parsed arguments."""
+    result_container = searx.search.Search(search_query).search()
+    result_container_json = {
+        "search": {
+            "q": search_query.query,
+            "pageno": search_query.pageno,
+            "lang": search_query.lang,
+            "safesearch": search_query.safesearch,
+            "timerange": search_query.time_range,
+        },
+        "results": no_parsed_url(result_container.get_ordered_results()),
+        "infoboxes": result_container.infoboxes,
+        "suggestions": list(result_container.suggestions),
+        "answers": list(result_container.answers),
+        "paging": result_container.paging,
+        "number_of_results": result_container.number_of_results,
+    }
+    return result_container_json
+
+
+def parse_argument(
+    args: Optional[List[str]] = None, category_choices: EngineCategoriesVar = None
+) -> argparse.Namespace:
+    """Parse command line.
+
+    :raise SystemExit: Query argument required on `args`
+
+    Examples:
+
+    >>> import importlib
+    ... # load module
+    ... spec = importlib.util.spec_from_file_location(
+    ...     'utils.standalone_searx', 'utils/standalone_searx.py')
+    ... sas = importlib.util.module_from_spec(spec)
+    ... spec.loader.exec_module(sas)
+    ... sas.parse_argument()
+    usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
+                     query
+    SystemExit: 2
+    >>> sas.parse_argument(['rain'])
+    Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
+    """  # noqa: E501
+    if not category_choices:
+        category_choices = list(searx.engines.categories.keys())
+    parser = argparse.ArgumentParser(description='Standalone searx.')
+    parser.add_argument('query', type=str, help='Text query')
+    parser.add_argument(
+        '--category', type=str, nargs='?', choices=category_choices, default='general', help='Search category'
+    )
+    parser.add_argument('--lang', type=str, nargs='?', default='all', help='Search language')
+    parser.add_argument('--pageno', type=int, nargs='?', default=1, help='Page number starting from 1')
+    parser.add_argument(
+        '--safesearch',
+        type=str,
+        nargs='?',
+        choices=['0', '1', '2'],
+        default='0',
+        help='Safe content filter from none to strict',
+    )
+    parser.add_argument(
+        '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range'
+    )
+    return parser.parse_args(args)
+
+
+if __name__ == '__main__':
+    settings_engines = searx.settings['engines']
+    searx.search.load_engines(settings_engines)
+    engine_cs = list(searx.engines.categories.keys())
+    prog_args = parse_argument(category_choices=engine_cs)
+    searx.search.initialize_network(settings_engines, searx.settings['outgoing'])
+    searx.search.check_network_configuration()
+    searx.search.initialize_metrics([engine['name'] for engine in settings_engines])
+    searx.search.initialize_processors(settings_engines)
+    search_q = get_search_query(prog_args, engine_categories=engine_cs)
+    res_dict = to_dict(search_q)
+    sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial))
--- a/searxng_extra/update/init.py
+++ b/searxng_extra/update/init.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=missing-module-docstring
--- a/searxng_extra/update/update_ahmia_blacklist.py
+++ b/searxng_extra/update/update_ahmia_blacklist.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script saves `Ahmia's blacklist`_ for onion sites.
+
+Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
+...  <.github/workflows/data-update.yml>`).
+
+.. _Ahmia's blacklist: https://ahmia.fi/blacklist/
+
+"""
+# pylint: disable=use-dict-literal
+
+import requests
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'ahmia_blacklist.txt'
+URL = 'https://ahmia.fi/blacklist/banned/'
+
+
+def fetch_ahmia_blacklist():
+    resp = requests.get(URL, timeout=3.0)
+    if resp.status_code != 200:
+        # pylint: disable=broad-exception-raised
+        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)  # type: ignore
+    return resp.text.split()
+
+
+if __name__ == '__main__':
+    blacklist = fetch_ahmia_blacklist()
+    blacklist.sort()
+    with DATA_FILE.open("w", encoding='utf-8') as f:
+        f.write('\n'.join(blacklist))
--- a/searxng_extra/update/update_currencies.py
+++ b/searxng_extra/update/update_currencies.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
+
+# pylint: disable=invalid-name
+
+import re
+import unicodedata
+import json
+
+from searx.locales import LOCALE_NAMES, locales_initialize
+from searx.engines import wikidata, set_loggers
+from searx.data.currencies import CurrenciesDB
+
+set_loggers(wikidata, 'wikidata')
+locales_initialize()
+
+# ORDER BY (with all the query fields) is important to keep a deterministic result order
+# so multiple invocation of this script doesn't change currencies.json
+SARQL_REQUEST = """
+SELECT DISTINCT ?iso4217 ?unit ?unicode ?label ?alias WHERE {
+  ?item wdt:P498 ?iso4217; rdfs:label ?label.
+  OPTIONAL { ?item skos:altLabel ?alias FILTER (LANG (?alias) = LANG(?label)). }
+  OPTIONAL { ?item wdt:P5061 ?unit. }
+  OPTIONAL { ?item wdt:P489 ?symbol.
+             ?symbol wdt:P487 ?unicode. }
+  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
+  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
+  FILTER(LANG(?label) IN (%LANGUAGES_SPARQL%)).
+}
+ORDER BY ?iso4217 ?unit ?unicode ?label ?alias
+"""
+
+# ORDER BY (with all the query fields) is important to keep a deterministic result order
+# so multiple invocation of this script doesn't change currencies.json
+SPARQL_WIKIPEDIA_NAMES_REQUEST = """
+SELECT DISTINCT ?iso4217 ?article_name WHERE {
+  ?item wdt:P498 ?iso4217 .
+  ?article schema:about ?item ;
+           schema:name ?article_name ;
+           schema:isPartOf [ wikibase:wikiGroup "wikipedia" ]
+  MINUS { ?item wdt:P582 ?end_data . }                  # Ignore monney with an end date
+  MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . }      # Ignore "former entity" (obsolete currency)
+  FILTER(LANG(?article_name) IN (%LANGUAGES_SPARQL%)).
+}
+ORDER BY ?iso4217 ?article_name
+"""
+
+
+LANGUAGES = LOCALE_NAMES.keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+
+
+def remove_accents(name):
+    return unicodedata.normalize('NFKD', name).lower()
+
+
+def remove_extra(name):
+    for c in ('(', ':'):
+        if c in name:
+            name = name.split(c)[0].strip()
+    return name
+
+
+def _normalize_name(name):
+    name = re.sub(' +', ' ', remove_accents(name.lower()).replace('-', ' '))
+    name = remove_extra(name)
+    return name
+
+
+def add_currency_name(db, name, iso4217, normalize_name=True):
+    db_names = db['names']
+
+    if normalize_name:
+        name = _normalize_name(name)
+
+    iso4217_set = db_names.setdefault(name, [])
+    if iso4217 not in iso4217_set:
+        iso4217_set.insert(0, iso4217)
+
+
+def add_currency_label(db, label, iso4217, language):
+    labels = db['iso4217'].setdefault(iso4217, {})
+    labels[language] = label
+
+
+def wikidata_request_result_iterator(request):
+    result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL), timeout=20)
+    if result is not None:
+        yield from result['results']['bindings']
+
+
+def fetch_db():
+    db = {
+        'names': {},
+        'iso4217': {},
+    }
+
+    for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
+        iso4217 = r['iso4217']['value']
+        article_name = r['article_name']['value']
+        article_lang = r['article_name']['xml:lang']
+        add_currency_name(db, article_name, iso4217)
+        add_currency_label(db, article_name, iso4217, article_lang)
+
+    for r in wikidata_request_result_iterator(SARQL_REQUEST):
+        iso4217 = r['iso4217']['value']
+        if 'label' in r:
+            label = r['label']['value']
+            label_lang = r['label']['xml:lang']
+            add_currency_name(db, label, iso4217)
+            add_currency_label(db, label, iso4217, label_lang)
+
+        if 'alias' in r:
+            add_currency_name(db, r['alias']['value'], iso4217)
+
+        if 'unicode' in r:
+            add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
+
+        if 'unit' in r:
+            add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
+
+    return db
+
+
+def main():
+
+    db = fetch_db()
+
+    # static
+    add_currency_name(db, "euro", 'EUR')
+    add_currency_name(db, "euros", 'EUR')
+    add_currency_name(db, "dollar", 'USD')
+    add_currency_name(db, "dollars", 'USD')
+    add_currency_name(db, "peso", 'MXN')
+    add_currency_name(db, "pesos", 'MXN')
+
+    # reduce memory usage:
+    # replace lists with one item by the item.  see
+    # searx.search.processors.online_currency.name_to_iso4217
+    for name in db['names']:
+        if len(db['names'][name]) == 1:
+            db['names'][name] = db['names'][name][0]
+
+    with CurrenciesDB.json_file.open('w', encoding='utf8') as f:
+        json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    main()
--- a/searxng_extra/update/update_engine_descriptions.py
+++ b/searxng_extra/update/update_engine_descriptions.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch website description from websites and from
+:origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/engine_descriptions.json`.
+
+"""
+
+# pylint: disable=invalid-name, global-statement
+
+import json
+from urllib.parse import urlparse
+from os.path import join
+
+from lxml.html import fromstring
+
+from searx.engines import wikidata, set_loggers
+from searx.utils import extract_text, searx_useragent
+from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
+from searx import searx_dir
+from searx.utils import gen_useragent, detect_language
+import searx.search
+import searx.network
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'engine_descriptions.json'
+
+set_loggers(wikidata, 'wikidata')
+locales_initialize()
+
+# you can run the query in https://query.wikidata.org
+# replace %IDS% by Wikidata entities separated by spaces with the prefix wd:
+# for example wd:Q182496 wd:Q1540899
+# replace %LANGUAGES_SPARQL% by languages
+SPARQL_WIKIPEDIA_ARTICLE = """
+SELECT DISTINCT ?item ?name ?article ?lang
+WHERE {
+  hint:Query hint:optimizer "None".
+  VALUES ?item { %IDS% }
+  ?article schema:about ?item ;
+              schema:inLanguage ?lang ;
+              schema:name ?name ;
+              schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
+  FILTER(?lang in (%LANGUAGES_SPARQL%)) .
+  FILTER (!CONTAINS(?name, ':')) .
+}
+ORDER BY ?item ?lang
+"""
+
+SPARQL_DESCRIPTION = """
+SELECT DISTINCT ?item ?itemDescription
+WHERE {
+  VALUES ?item { %IDS% }
+  ?item schema:description ?itemDescription .
+  FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
+}
+ORDER BY ?itemLang
+"""
+
+NOT_A_DESCRIPTION = [
+    'web site',
+    'site web',
+    'komputa serĉilo',
+    'interreta serĉilo',
+    'bilaketa motor',
+    'web search engine',
+    'wikimedia täpsustuslehekülg',
+]
+
+SKIP_ENGINE_SOURCE = [
+    # fmt: off
+    ('gitlab', 'wikidata')
+    # descriptions are about wikipedia disambiguation pages
+    # fmt: on
+]
+
+WIKIPEDIA_LANGUAGES = {}
+LANGUAGES_SPARQL = ''
+IDS = None
+WIKIPEDIA_LANGUAGE_VARIANTS = {'zh_Hant': 'zh-tw'}
+
+
+descriptions = {}
+wd_to_engine_name = {}
+
+
+def normalize_description(description):
+    for c in [chr(c) for c in range(0, 31)]:
+        description = description.replace(c, ' ')
+    description = ' '.join(description.strip().split())
+    return description
+
+
+def update_description(engine_name, lang, description, source, replace=True):
+    if not isinstance(description, str):
+        return
+    description = normalize_description(description)
+    if description.lower() == engine_name.lower():
+        return
+    if description.lower() in NOT_A_DESCRIPTION:
+        return
+    if (engine_name, source) in SKIP_ENGINE_SOURCE:
+        return
+    if ' ' not in description:
+        # skip unique word description (like "website")
+        return
+    if replace or lang not in descriptions[engine_name]:
+        descriptions[engine_name][lang] = [description, source]
+
+
+def get_wikipedia_summary(wikipedia_url, searxng_locale):
+    # get the REST API URL from the HTML URL
+
+    # Headers
+    headers = {'User-Agent': searx_useragent()}
+
+    if searxng_locale in WIKIPEDIA_LANGUAGE_VARIANTS:
+        headers['Accept-Language'] = WIKIPEDIA_LANGUAGE_VARIANTS.get(searxng_locale)
+
+    # URL path : from HTML URL to REST API URL
+    parsed_url = urlparse(wikipedia_url)
+    # remove the /wiki/ prefix
+    article_name = parsed_url.path.split('/wiki/')[1]
+    # article_name is already encoded but not the / which is required for the REST API call
+    encoded_article_name = article_name.replace('/', '%2F')
+    path = '/api/rest_v1/page/summary/' + encoded_article_name
+    wikipedia_rest_url = parsed_url._replace(path=path).geturl()
+    try:
+        response = searx.network.get(wikipedia_rest_url, headers=headers, timeout=10)
+        response.raise_for_status()
+    except Exception as e:  # pylint: disable=broad-except
+        print("     ", wikipedia_url, e)
+        return None
+    api_result = json.loads(response.text)
+    return api_result.get('extract')
+
+
+def get_website_description(url, lang1, lang2=None):
+    headers = {
+        'User-Agent': gen_useragent(),
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'DNT': '1',
+        'Upgrade-Insecure-Requests': '1',
+        'Sec-GPC': '1',
+        'Cache-Control': 'max-age=0',
+    }
+    if lang1 is not None:
+        lang_list = [lang1]
+        if lang2 is not None:
+            lang_list.append(lang2)
+        headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
+    try:
+        response = searx.network.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+    except Exception:  # pylint: disable=broad-except
+        return (None, None)
+
+    try:
+        html = fromstring(response.text)
+    except ValueError:
+        html = fromstring(response.content)
+
+    description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/title'))
+    lang = extract_text(html.xpath('/html/@lang'))
+    if lang is None and len(lang1) > 0:
+        lang = lang1
+    lang = detect_language(description) or lang or 'en'
+    lang = lang.split('_')[0]
+    lang = lang.split('-')[0]
+    return (lang, description)
+
+
+def initialize():
+    global IDS, LANGUAGES_SPARQL
+    searx.search.initialize()
+    wikipedia_engine = searx.engines.engines['wikipedia']
+
+    locale2lang = {'nl-BE': 'nl'}
+    for sxng_ui_lang in LOCALE_NAMES:
+
+        sxng_ui_alias = locale2lang.get(sxng_ui_lang, sxng_ui_lang)
+        wiki_lang = None
+
+        if sxng_ui_alias in wikipedia_engine.traits.custom['WIKIPEDIA_LANGUAGES']:
+            wiki_lang = sxng_ui_alias
+        if not wiki_lang:
+            wiki_lang = wikipedia_engine.traits.get_language(sxng_ui_alias)
+        if not wiki_lang:
+            print(f"WIKIPEDIA_LANGUAGES missing {sxng_ui_lang}")
+            continue
+        WIKIPEDIA_LANGUAGES[sxng_ui_lang] = wiki_lang
+
+    LANGUAGES_SPARQL = ', '.join(f"'{l}'" for l in set(WIKIPEDIA_LANGUAGES.values()))
+    for engine_name, engine in searx.engines.engines.items():
+        descriptions[engine_name] = {}
+        wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
+        if wikidata_id is not None:
+            wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
+
+    IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
+
+
+def fetch_wikidata_descriptions():
+    print('Fetching wikidata descriptions')
+    searx.network.set_timeout_for_thread(60)
+    result = wikidata.send_wikidata_query(
+        SPARQL_DESCRIPTION.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
+    )
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            wikidata_lang = binding['itemDescription']['xml:lang']
+            desc = binding['itemDescription']['value']
+            for engine_name in wd_to_engine_name[wikidata_id]:
+                for searxng_locale in LOCALE_NAMES:
+                    if WIKIPEDIA_LANGUAGES[searxng_locale] != wikidata_lang:
+                        continue
+                    print(
+                        f"    engine: {engine_name:20} / wikidata_lang: {wikidata_lang:5}",
+                        f"/ len(wikidata_desc): {len(desc)}",
+                    )
+                    update_description(engine_name, searxng_locale, desc, 'wikidata')
+
+
+def fetch_wikipedia_descriptions():
+    print('Fetching wikipedia descriptions')
+    result = wikidata.send_wikidata_query(
+        SPARQL_WIKIPEDIA_ARTICLE.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
+    )
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            wikidata_lang = binding['name']['xml:lang']
+            wikipedia_url = binding['article']['value']  # for example the URL https://de.wikipedia.org/wiki/PubMed
+            for engine_name in wd_to_engine_name[wikidata_id]:
+                for searxng_locale in LOCALE_NAMES:
+                    if WIKIPEDIA_LANGUAGES[searxng_locale] != wikidata_lang:
+                        continue
+                    desc = get_wikipedia_summary(wikipedia_url, searxng_locale)
+                    if not desc:
+                        continue
+                    print(
+                        f"    engine: {engine_name:20} / wikidata_lang: {wikidata_lang:5}",
+                        f"/ len(wikipedia_desc): {len(desc)}",
+                    )
+                    update_description(engine_name, searxng_locale, desc, 'wikipedia')
+
+
+def normalize_url(url):
+    url = url.replace('{language}', 'en')
+    url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
+    url = url.replace('https://api.', 'https://')
+    return url
+
+
+def fetch_website_description(engine_name, website):
+    print(f"- fetch website descr: {engine_name} / {website}")
+    default_lang, default_description = get_website_description(website, None, None)
+
+    if default_lang is None or default_description is None:
+        # the front page can't be fetched: skip this engine
+        return
+
+    # to specify an order in where the most common languages are in front of the
+    # language list ..
+    languages = ['en', 'es', 'pt', 'ru', 'tr', 'fr']
+    languages = languages + [l for l in LOCALE_NAMES if l not in languages]
+
+    previous_matched_lang = None
+    previous_count = 0
+
+    for lang in languages:
+
+        if lang in descriptions[engine_name]:
+            continue
+
+        fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
+        if fetched_lang is None or desc is None:
+            continue
+
+        # check if desc changed with the different lang values
+
+        if fetched_lang == previous_matched_lang:
+            previous_count += 1
+            if previous_count == 6:
+                # the website has returned the same description for 6 different languages in Accept-Language header
+                # stop now
+                break
+        else:
+            previous_matched_lang = fetched_lang
+            previous_count = 0
+
+        # Don't trust in the value of fetched_lang, some websites return
+        # for some inappropriate values, by example bing-images::
+        #
+        #   requested lang: zh-Hans-CN / fetched lang: ceb / desc: 查看根据您的兴趣量身定制的提要
+        #
+        # The lang ceb is "Cebuano" but the description is given in zh-Hans-CN
+
+        print(
+            f"    engine: {engine_name:20} / requested lang:{lang:7}"
+            f" / fetched lang: {fetched_lang:7} / len(desc): {len(desc)}"
+        )
+
+        matched_lang = match_locale(fetched_lang, LOCALE_NAMES.keys(), fallback=lang)
+        update_description(engine_name, matched_lang, desc, website, replace=False)
+
+
+def fetch_website_descriptions():
+    print('Fetching website descriptions')
+    for engine_name, engine in searx.engines.engines.items():
+        website = getattr(engine, "about", {}).get('website')
+        if website is None and hasattr(engine, "search_url"):
+            website = normalize_url(getattr(engine, "search_url"))
+        if website is None and hasattr(engine, "base_url"):
+            website = normalize_url(getattr(engine, "base_url"))
+        if website is not None:
+            fetch_website_description(engine_name, website)
+
+
+def get_engine_descriptions_filename():
+    return join(join(searx_dir, "data"), "engine_descriptions.json")
+
+
+def get_output():
+    """
+    From descriptions[engine][language] = [description, source]
+    To
+
+    * output[language][engine] = description_and_source
+    * description_and_source can be:
+       * [description, source]
+       * description (if source = "wikipedia")
+       * [f"engine:lang", "ref"] (reference to another existing description)
+    """
+    output = {locale: {} for locale in LOCALE_NAMES}
+
+    seen_descriptions = {}
+
+    for engine_name, lang_descriptions in descriptions.items():
+        for language, description in lang_descriptions.items():
+            if description[0] in seen_descriptions:
+                ref = seen_descriptions[description[0]]
+                description = [f'{ref[0]}:{ref[1]}', 'ref']
+            else:
+                seen_descriptions[description[0]] = (engine_name, language)
+                if description[1] == 'wikipedia':
+                    description = description[0]
+            output.setdefault(language, {}).setdefault(engine_name, description)
+
+    return output
+
+
+def main():
+    initialize()
+    fetch_wikidata_descriptions()
+    fetch_wikipedia_descriptions()
+    fetch_website_descriptions()
+
+    output = get_output()
+    with DATA_FILE.open('w', encoding='utf8') as f:
+        f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
--- a/searxng_extra/update/update_engine_traits.py
+++ b/searxng_extra/update/update_engine_traits.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
+
+:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
+  Persistence of engines traits, fetched from the engines.
+
+:origin:`searx/languages.py`
+  Is generated  from intersecting each engine's supported traits.
+
+The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
+the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
+
+"""
+
+# pylint: disable=invalid-name
+from unicodedata import lookup
+from pathlib import Path
+from pprint import pformat
+import babel
+
+from searx import settings, searx_dir
+from searx import network
+from searx.engines import load_engines
+from searx.enginelib.traits import EngineTraitsMap
+
+# Output files.
+languages_file = Path(searx_dir) / 'sxng_locales.py'
+languages_file_header = """\
+# SPDX-License-Identifier: AGPL-3.0-or-later
+'''List of SearXNG's locale codes used for the search language/region.
+
+.. hint::
+
+   Don't modify this file, this file is generated by::
+
+     ./manage data.traits
+'''
+
+sxng_locales = (
+"""
+languages_file_footer = """,
+)
+'''
+A list of five-digit tuples:
+
+0. SearXNG's internal locale tag (a language or region tag)
+1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
+2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
+   Empty string for language tags.
+3. English language name (from :py:obj:`babel.core.Locale.english_name`)
+4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
+   are represented by a globe (\U0001F310)
+
+.. code:: python
+
+   ('en',    'English', '',              'English', '\U0001f310'),
+   ('en-CA', 'English', 'Canada',        'English', '\U0001f1e8\U0001f1e6'),
+   ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+   ..
+   ('fr',    'Français', '',             'French',  '\U0001f310'),
+   ('fr-BE', 'Français', 'Belgique',     'French',  '\U0001f1e7\U0001f1ea'),
+   ('fr-CA', 'Français', 'Canada',       'French',  '\U0001f1e8\U0001f1e6'),
+
+:meta hide-value:
+'''
+"""
+
+
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F1',  # Hebrew
+}
+
+
+def main():
+    load_engines(settings['engines'])
+    # traits_map = EngineTraitsMap.from_data()
+    traits_map = fetch_traits_map()
+    sxng_tag_list = filter_locales(traits_map)
+    write_languages_file(sxng_tag_list)
+
+
+def fetch_traits_map():
+    """Fetches supported languages for each engine and writes json file with those."""
+    network.set_timeout_for_thread(10.0)
+
+    def log(msg):
+        print(msg)
+
+    traits_map = EngineTraitsMap.fetch_traits(log=log)
+    print("fetched properties from %s engines" % len(traits_map))
+    print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
+    traits_map.save_data()
+    return traits_map
+
+
+def filter_locales(traits_map: EngineTraitsMap):
+    """Filter language & region tags by a threshold."""
+
+    min_eng_per_region = 18
+    min_eng_per_lang = 22
+
+    _ = {}
+    for eng in traits_map.values():
+        for reg in eng.regions.keys():
+            _[reg] = _.get(reg, 0) + 1
+
+    regions = set(k for k, v in _.items() if v >= min_eng_per_region)
+    lang_from_region = set(k.split('-')[0] for k in regions)
+
+    _ = {}
+    for eng in traits_map.values():
+        for lang in eng.languages.keys():
+            # ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they
+            # already counted by existence of 'zh' or 'sr', 'pa')
+            if '_' in lang:
+                # print("ignore %s" % lang)
+                continue
+            _[lang] = _.get(lang, 0) + 1
+
+    languages = set(k for k, v in _.items() if v >= min_eng_per_lang)
+
+    sxng_tag_list = set()
+    sxng_tag_list.update(regions)
+    sxng_tag_list.update(lang_from_region)
+    sxng_tag_list.update(languages)
+
+    return sxng_tag_list
+
+
+def write_languages_file(sxng_tag_list):
+
+    language_codes = []
+
+    for sxng_tag in sorted(sxng_tag_list):
+        sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-')
+
+        flag = get_unicode_flag(sxng_locale) or ''
+
+        item = (
+            sxng_tag,
+            sxng_locale.get_language_name().title(),  # type: ignore
+            sxng_locale.get_territory_name() or '',
+            sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
+            UnicodeEscape(flag),
+        )
+
+        language_codes.append(item)
+
+    language_codes = tuple(language_codes)
+
+    with languages_file.open('w', encoding='utf-8') as new_file:
+        file_content = "{header} {language_codes}{footer}".format(
+            header=languages_file_header,
+            language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
+            footer=languages_file_footer,
+        )
+        new_file.write(file_content)
+        new_file.close()
+
+
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
+def get_unicode_flag(locale: babel.Locale):
+    """Determine a unicode flag (emoji) that fits to the ``locale``"""
+
+    emoji = lang2emoji.get(locale.language)
+    if emoji:
+        return emoji
+
+    if not locale.territory:
+        return '\U0001F310'
+
+    emoji = lang2emoji.get(locale.territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1])
+        # print("OK   : %s --> %s%s" % (locale, c1, c2))
+    except KeyError as exc:
+        print("ERROR: %s --> %s" % (locale, exc))
+        return None
+
+    return c1 + c2
+
+
+if __name__ == "__main__":
+    main()
--- a/searxng_extra/update/update_external_bangs.py
+++ b/searxng_extra/update/update_external_bangs.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs
+from :py:obj:`BANGS_URL`.
+
+- :origin:`CI Update data ... <.github/workflows/data-update.yml>`
+
+"""
+
+import json
+import httpx
+
+from searx.external_bang import LEAF_KEY
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'external_bangs.json'
+
+BANGS_URL = 'https://duckduckgo.com/bang.js'
+"""JSON file which contains the bangs."""
+
+HTTPS_COLON = 'https:'
+HTTP_COLON = 'http:'
+
+
+def main():
+    print(f'fetch bangs from {BANGS_URL}')
+    response = httpx.get(BANGS_URL)
+    response.raise_for_status()
+    ddg_bangs = json.loads(response.content.decode())
+    trie = parse_ddg_bangs(ddg_bangs)
+    output = {
+        'version': 0,
+        'trie': trie,
+    }
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)
+
+
+def merge_when_no_leaf(node):
+    """Minimize the number of nodes
+
+    ``A -> B -> C``
+
+    - ``B`` is child of ``A``
+    - ``C`` is child of ``B``
+
+    If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
+    into ``A``.  For example (5 nodes)::
+
+      d -> d -> g -> <LEAF_KEY> (ddg)
+        -> i -> g -> <LEAF_KEY> (dig)
+
+    becomes (3 nodes)::
+
+      d -> dg -> <LEAF_KEY>
+        -> ig -> <LEAF_KEY>
+
+    """
+    restart = False
+    if not isinstance(node, dict):
+        return
+
+    # create a copy of the keys so node can be modified
+    keys = list(node.keys())
+
+    for key in keys:
+        if key == LEAF_KEY:
+            continue
+
+        value = node[key]
+        value_keys = list(value.keys())
+        if LEAF_KEY not in value_keys:
+            for value_key in value_keys:
+                node[key + value_key] = value[value_key]
+                merge_when_no_leaf(node[key + value_key])
+            del node[key]
+            restart = True
+        else:
+            merge_when_no_leaf(value)
+
+    if restart:
+        merge_when_no_leaf(node)
+
+
+def optimize_leaf(parent, parent_key, node):
+    if not isinstance(node, dict):
+        return
+
+    if len(node) == 1 and LEAF_KEY in node and parent is not None:
+        parent[parent_key] = node[LEAF_KEY]
+    else:
+        for key, value in node.items():
+            optimize_leaf(node, key, value)
+
+
+def parse_ddg_bangs(ddg_bangs):
+    bang_trie = {}
+    bang_urls = {}
+
+    for bang_definition in ddg_bangs:
+        # bang_list
+        bang_url = bang_definition['u']
+        if '{{{s}}}' not in bang_url:
+            # ignore invalid bang
+            continue
+
+        bang_url = bang_url.replace('{{{s}}}', chr(2))
+
+        # only for the https protocol: "https://example.com" becomes "//example.com"
+        if bang_url.startswith(HTTPS_COLON + '//'):
+            bang_url = bang_url[len(HTTPS_COLON) :]
+
+        #
+        if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON) :] in bang_urls:
+            # if the bang_url uses the http:// protocol, and the same URL exists in https://
+            # then reuse the https:// bang definition. (written //example.com)
+            bang_def_output = bang_urls[bang_url[len(HTTP_COLON) :]]
+        else:
+            # normal use case : new http:// URL or https:// URL (without "https:", see above)
+            bang_rank = str(bang_definition['r'])
+            bang_def_output = bang_url + chr(1) + bang_rank
+            bang_def_output = bang_urls.setdefault(bang_url, bang_def_output)
+
+        bang_urls[bang_url] = bang_def_output
+
+        # bang name
+        bang = bang_definition['t']
+
+        # bang_trie
+        t = bang_trie
+        for bang_letter in bang:
+            t = t.setdefault(bang_letter, {})
+        t = t.setdefault(LEAF_KEY, bang_def_output)
+
+    # optimize the trie
+    merge_when_no_leaf(bang_trie)
+    optimize_leaf(None, None, bang_trie)
+
+    return bang_trie
+
+
+if __name__ == '__main__':
+    main()
--- a/searxng_extra/update/update_firefox_version.py
+++ b/searxng_extra/update/update_firefox_version.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch firefox useragent signatures
+
+Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
+# pylint: disable=use-dict-literal
+
+import json
+import re
+from urllib.parse import urlparse, urljoin
+from packaging.version import parse
+
+import requests
+from lxml import html
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'useragents.json'
+
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
+RELEASE_PATH = '/pub/firefox/releases/'
+
+NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
+# BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
+# ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
+
+#
+useragents = {
+    # fmt: off
+    "versions": (),
+    "os": ('Windows NT 10.0; Win64; x64',
+           'X11; Linux x86_64'),
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
+    # fmt: on
+}
+
+
+def fetch_firefox_versions():
+    resp = requests.get(URL, timeout=2.0)
+    if resp.status_code != 200:
+        # pylint: disable=broad-exception-raised
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)  # type: ignore
+    dom = html.fromstring(resp.text)
+    versions = []
+
+    for link in dom.xpath('//a/@href'):
+        url = urlparse(urljoin(URL, link))
+        path = url.path
+        if path.startswith(RELEASE_PATH):
+            version = path[len(RELEASE_PATH) : -1]
+            if NORMAL_REGEX.match(version):
+                versions.append(parse(version))
+
+    list.sort(versions, reverse=True)
+    return versions
+
+
+def fetch_firefox_last_versions():
+    versions = fetch_firefox_versions()
+
+    result = []
+    major_last = versions[0].major
+    major_list = (major_last, major_last - 1)
+    for version in versions:
+        major_current = version.major
+        minor_current = version.minor
+        if major_current in major_list:
+            user_agent_version = f'{major_current}.{minor_current}'
+            if user_agent_version not in result:
+                result.append(user_agent_version)
+
+    return result
+
+
+if __name__ == '__main__':
+    useragents["versions"] = fetch_firefox_last_versions()
+    with DATA_FILE.open('w', encoding='utf-8') as f:
+        json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)
--- a/searxng_extra/update/update_locales.py
+++ b/searxng_extra/update/update_locales.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update locale names in :origin:`searx/data/locales.json` used by
+:ref:`searx.locales`
+
+- :py:obj:`searx.locales.RTL_LOCALES`
+- :py:obj:`searx.locales.LOCALE_NAMES`
+"""
+# pylint: disable=invalid-name
+from __future__ import annotations
+
+from typing import Set
+import json
+from pathlib import Path
+
+import babel
+import babel.languages
+import babel.core
+
+from searx import searx_dir
+from searx.locales import (
+    ADDITIONAL_TRANSLATIONS,
+    LOCALE_BEST_MATCH,
+    get_translation_locales,
+)
+
+LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
+TRANSLATIONS_FOLDER = Path(searx_dir) / 'translations'
+
+
+def main():
+
+    LOCALE_NAMES = {}
+    RTL_LOCALES: Set[str] = set()
+
+    for tag, descr in ADDITIONAL_TRANSLATIONS.items():
+        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+        LOCALE_NAMES[tag] = descr
+        if locale.text_direction == 'rtl':
+            RTL_LOCALES.add(tag)
+
+    for tag in LOCALE_BEST_MATCH:
+        descr = LOCALE_NAMES.get(tag)
+        if not descr:
+            locale = babel.Locale.parse(tag, sep='-')
+            LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
+            if locale.text_direction == 'rtl':
+                RTL_LOCALES.add(tag)
+
+    for tr_locale in get_translation_locales():
+        sxng_tag = tr_locale.replace('_', '-')
+        descr = LOCALE_NAMES.get(sxng_tag)
+        if not descr:
+            locale = babel.Locale.parse(tr_locale)
+            LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
+            if locale.text_direction == 'rtl':
+                RTL_LOCALES.add(sxng_tag)
+
+    content = {
+        "LOCALE_NAMES": LOCALE_NAMES,
+        "RTL_LOCALES": sorted(RTL_LOCALES),
+    }
+
+    with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
+        json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
+
+
+def get_locale_descr(locale: babel.Locale, tr_locale):
+    """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+
+    :param locale: instance of :py:class:`Locale`
+    :param tr_locale: name e.g. 'fr'  or 'pt_BR' (delimiter is *underscore*)
+    """
+
+    native_language, native_territory = _get_locale_descr(locale, tr_locale)
+    english_language, english_territory = _get_locale_descr(locale, 'en')
+
+    if native_territory == english_territory:
+        english_territory = None
+
+    if not native_territory and not english_territory:
+        # none territory name
+        if native_language == english_language:
+            return native_language
+        return native_language + ' (' + english_language + ')'
+
+    result = native_language + ', ' + native_territory + ' (' + english_language
+    if english_territory:
+        return result + ', ' + english_territory + ')'
+    return result + ')'
+
+
+def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
+    language_name = locale.get_language_name(tr_locale).capitalize()  # type: ignore
+    if language_name and ('a' <= language_name[0] <= 'z'):
+        language_name = language_name.capitalize()
+    territory_name: str = locale.get_territory_name(tr_locale)  # type: ignore
+    return language_name, territory_name
+
+
+if __name__ == "__main__":
+    main()
--- a/searxng_extra/update/update_osm_keys_tags.py
+++ b/searxng_extra/update/update_osm_keys_tags.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch OSM keys and tags.
+
+To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
+example `OSM tags API`_ (side note: the actual change log from
+map.atownsend.org.uk_ might be useful to normalize OSM tags).
+
+Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+.. _Wikidata Query Service: https://query.wikidata.org/
+.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc
+.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html
+
+:py:obj:`SPARQL_TAGS_REQUEST` :
+    Wikidata SPARQL query that returns *type-categories* and *types*.  The
+    returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`).
+    Example:
+
+    - https://taginfo.openstreetmap.org/tags/building=house#overview
+    - https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse
+      at the bottom of the infobox (right side), there is a link to wikidata:
+      https://www.wikidata.org/wiki/Q3947
+      see property "OpenStreetMap tag or key" (P1282)
+    - https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow
+      https://www.wikidata.org/wiki/Q850107
+
+:py:obj:`SPARQL_KEYS_REQUEST` :
+    Wikidata SPARQL query that returns *keys*.  Example with "payment":
+
+    - https://wiki.openstreetmap.org/wiki/Key%3Apayment
+      at the bottom of infobox (right side), there is a link to wikidata:
+      https://www.wikidata.org/wiki/Q1148747
+      link made using the "OpenStreetMap tag or key" property (P1282)
+      to be confirm: there is a one wiki page per key ?
+    - https://taginfo.openstreetmap.org/keys/payment#values
+    - https://taginfo.openstreetmap.org/keys/payment:cash#values
+
+    ``rdfs:label`` get all the labels without language selection
+    (as opposed to SERVICE ``wikibase:label``).
+
+"""
+
+import json
+import collections
+
+from searx.network import set_timeout_for_thread
+from searx.engines import wikidata, set_loggers
+from searx.sxng_locales import sxng_locales
+from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'osm_keys_tags.json'
+
+set_loggers(wikidata, 'wikidata')
+
+
+SPARQL_TAGS_REQUEST = """
+SELECT ?tag ?item ?itemLabel WHERE {
+  ?item wdt:P1282 ?tag .
+  ?item rdfs:label ?itemLabel .
+  FILTER(STRSTARTS(?tag, 'Tag'))
+}
+GROUP BY ?tag ?item ?itemLabel
+ORDER BY ?tag ?item ?itemLabel
+"""
+
+SPARQL_KEYS_REQUEST = """
+SELECT ?key ?item ?itemLabel WHERE {
+  ?item wdt:P1282 ?key .
+  ?item rdfs:label ?itemLabel .
+  FILTER(STRSTARTS(?key, 'Key'))
+}
+GROUP BY ?key ?item ?itemLabel
+ORDER BY ?key ?item ?itemLabel
+"""
+
+LANGUAGES = [l[0].lower() for l in sxng_locales]
+
+PRESET_KEYS = {
+    ('wikidata',): {'en': 'Wikidata'},
+    ('wikipedia',): {'en': 'Wikipedia'},
+    ('email',): {'en': 'Email'},
+    ('facebook',): {'en': 'Facebook'},
+    ('fax',): {'en': 'Fax'},
+    ('internet_access', 'ssid'): {'en': 'Wi-Fi'},
+}
+
+INCLUDED_KEYS = {('addr',)}
+
+
+def get_preset_keys():
+    results = collections.OrderedDict()
+    for keys, value in PRESET_KEYS.items():
+        r = results
+        for k in keys:
+            r = r.setdefault(k, {})
+        r.setdefault('*', value)
+    return results
+
+
+def get_keys():
+    results = get_preset_keys()
+    response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST)
+
+    for key in response['results']['bindings']:
+        keys = key['key']['value'].split(':')[1:]
+        if keys[0] == 'currency' and len(keys) > 1:
+            # special case in openstreetmap.py
+            continue
+        if keys[0] == 'contact' and len(keys) > 1:
+            # label for the key "contact.email" is "Email"
+            # whatever the language
+            r = results.setdefault('contact', {})
+            r[keys[1]] = {'*': {'en': keys[1]}}
+            continue
+        if tuple(keys) in PRESET_KEYS:
+            # skip presets (already set above)
+            continue
+        if (
+            get_key_rank(':'.join(keys)) is None
+            and ':'.join(keys) not in VALUE_TO_LINK
+            and tuple(keys) not in INCLUDED_KEYS
+        ):
+            # keep only keys that will be displayed by openstreetmap.py
+            continue
+        label = key['itemLabel']['value'].lower()
+        lang = key['itemLabel']['xml:lang']
+        r = results
+        for k in keys:
+            r = r.setdefault(k, {})
+        r = r.setdefault('*', {})
+        if lang in LANGUAGES:
+            r.setdefault(lang, label)
+
+    # special cases
+    results['delivery']['covid19']['*'].clear()
+    for k, v in results['delivery']['*'].items():
+        results['delivery']['covid19']['*'][k] = v + ' (COVID19)'
+
+    results['opening_hours']['covid19']['*'].clear()
+    for k, v in results['opening_hours']['*'].items():
+        results['opening_hours']['covid19']['*'][k] = v + ' (COVID19)'
+
+    return results
+
+
+def get_tags():
+    results = collections.OrderedDict()
+    response = wikidata.send_wikidata_query(SPARQL_TAGS_REQUEST)
+    for tag in response['results']['bindings']:
+        tag_names = tag['tag']['value'].split(':')[1].split('=')
+        if len(tag_names) == 2:
+            tag_category, tag_type = tag_names
+        else:
+            tag_category, tag_type = tag_names[0], ''
+        label = tag['itemLabel']['value'].lower()
+        lang = tag['itemLabel']['xml:lang']
+        if lang in LANGUAGES:
+            results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label)
+    return results
+
+
+def optimize_data_lang(translations):
+    language_to_delete = []
+    # remove "zh-hk" entry if the value is the same as "zh"
+    # same for "en-ca" / "en" etc...
+    for language in translations:
+        if '-' in language:
+            base_language = language.split('-')[0]
+            if translations.get(base_language) == translations.get(language):
+                language_to_delete.append(language)
+
+    for language in language_to_delete:
+        del translations[language]
+    language_to_delete = []
+
+    # remove entries that have the same value than the "en" entry
+    value_en = translations.get('en')
+    if value_en:
+        for language, value in translations.items():
+            if language != 'en' and value == value_en:
+                language_to_delete.append(language)
+
+    for language in language_to_delete:
+        del translations[language]
+
+
+def optimize_tags(data):
+    for v in data.values():
+        for translations in v.values():
+            optimize_data_lang(translations)
+    return data
+
+
+def optimize_keys(data):
+    for k, v in data.items():
+        if k == '*':
+            optimize_data_lang(v)
+        elif isinstance(v, dict):
+            optimize_keys(v)
+    return data
+
+
+if __name__ == '__main__':
+
+    set_timeout_for_thread(60)
+    result = {
+        'keys': optimize_keys(get_keys()),
+        'tags': optimize_tags(get_tags()),
+    }
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)
--- a/searxng_extra/update/update_pygments.py
+++ b/searxng_extra/update/update_pygments.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update pygments style
+
+Call this script after each upgrade of pygments
+
+"""
+# pylint: disable=too-few-public-methods
+
+from pathlib import Path
+import pygments
+from pygments.formatters.html import HtmlFormatter
+
+from searx import searx_dir
+
+LESS_FILE = Path(searx_dir).parent / 'client/simple/generated/pygments.less'
+
+HEADER = f"""\
+/*
+   this file is generated automatically by searxng_extra/update/update_pygments.py
+   using pygments version {pygments.__version__}
+*/
+
+"""
+
+START_LIGHT_THEME = """
+.code-highlight {
+"""
+
+END_LIGHT_THEME = """
+}
+"""
+
+START_DARK_THEME = """
+.code-highlight-dark(){
+  .code-highlight {
+"""
+
+END_DARK_THEME = """
+  }
+}
+"""
+
+
+class Formatter(HtmlFormatter):  # pylint: disable=missing-class-docstring
+    @property
+    def _pre_style(self):
+        return 'line-height: 100%;'
+
+    def get_style_lines(self, arg=None):
+        style_lines = []
+        style_lines.extend(self.get_linenos_style_defs())
+        style_lines.extend(self.get_background_style_defs(arg))
+        style_lines.extend(self.get_token_style_defs(arg))
+        return style_lines
+
+
+def generat_css(light_style, dark_style) -> str:
+    css = HEADER + START_LIGHT_THEME
+    for line in Formatter(style=light_style).get_style_lines():
+        css += '\n  ' + line
+    css += END_LIGHT_THEME + START_DARK_THEME
+    for line in Formatter(style=dark_style).get_style_lines():
+        css += '\n    ' + line
+    css += END_DARK_THEME
+    return css
+
+
+if __name__ == '__main__':
+    print("update: %s" % LESS_FILE)
+    with LESS_FILE.open('w', encoding='utf8') as f:
+        f.write(generat_css('default', 'lightbulb'))
--- a/searxng_extra/update/update_wikidata_units.py
+++ b/searxng_extra/update/update_wikidata_units.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch units from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
+...  <.github/workflows/data-update.yml>`).
+
+"""
+
+import json
+
+from searx.engines import wikidata, set_loggers
+from searx.data import data_dir
+from searx.wikidata_units import fetch_units
+
+DATA_FILE = data_dir / 'wikidata_units.json'
+set_loggers(wikidata, 'wikidata')
+
+
+if __name__ == '__main__':
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(fetch_units(), f, indent=4, sort_keys=True, ensure_ascii=False)