first commit

This commit is contained in:
Iyas Altawil
2025-06-26 15:38:10 +03:30
commit e928faf6d2
899 changed files with 403713 additions and 0 deletions

299
searx/enginelib/__init__.py Normal file
View File

@@ -0,0 +1,299 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Implementations of the framework for the SearXNG engines.
- :py:obj:`searx.enginelib.EngineCache`
- :py:obj:`searx.enginelib.Engine`
- :py:obj:`searx.enginelib.traits`
There is a command line for developer purposes and for deeper analysis. Here is
an example in which the command line is called in the development environment::
$ ./manage pyenv.cmd bash --norc --noprofile
(py3) python -m searx.enginelib --help
.. hint::
The long term goal is to modularize all implementations of the engine
framework here in this Python package. ToDo:
- move implementations of the :ref:`searx.engines loader` to a new module in
the :py:obj:`searx.enginelib` namespace.
-----
"""
from __future__ import annotations
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
from typing import List, Callable, TYPE_CHECKING, Any
import string
import typer
from ..cache import ExpireCache, ExpireCacheCfg
if TYPE_CHECKING:
from searx.enginelib import traits
ENGINES_CACHE = ExpireCache.build_cache(
ExpireCacheCfg(
name="ENGINES_CACHE",
MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
MAINTENANCE_PERIOD=60 * 60, # 2h
)
)
"""Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
`MAINTENANCE_PERIOD` is set to two hours."""
app = typer.Typer()
@app.command()
def state():
"""Show state for the caches of the engines."""
title = "cache tables and key/values"
print(title)
print("=" * len(title))
print(ENGINES_CACHE.state().report())
print()
title = f"properties of {ENGINES_CACHE.cfg.name}"
print(title)
print("=" * len(title))
print(str(ENGINES_CACHE.properties)) # type: ignore
@app.command()
def maintenance(force: bool = True):
"""Carry out maintenance on cache of the engines."""
ENGINES_CACHE.maintenance(force=force)
class EngineCache:
"""Persistent (SQLite) key/value cache that deletes its values again after
``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
<searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
:py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
<searx.cache.ExpireCacheSQLite>`).
In the :origin:`searx/engines/demo_offline.py` engine you can find an
exemplary implementation of such a cache other examples are implemented
in:
- :origin:`searx/engines/radio_browser.py`
- :origin:`searx/engines/soundcloud.py`
- :origin:`searx/engines/startpage.py`
.. code: python
from searx.enginelib import EngineCache
CACHE: EngineCache
def init(engine_settings):
global CACHE
CACHE = EngineCache(engine_settings["name"])
def request(query, params):
token = CACHE.get(key="token")
if token is None:
token = get_token()
# cache token of this engine for 1h
CACHE.set(key="token", value=token, expire=3600)
...
For introspection of the DB, jump into developer environment and run command to
show cache state::
$ ./manage pyenv.cmd bash --norc --noprofile
(py3) python -m searx.enginelib cache state
cache tables and key/values
===========================
[demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
[startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
[duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
[duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
[radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
[soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
[wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
number of tables: 6
number of key/value pairs: 7
In the "cache tables and key/values" section, the table name (engine name) is at
first position on the second there is the calculated expire date and on the
third and fourth position the key/value is shown.
About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
the key is a hash value of the query term (to not to store the raw query term).
In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
ExpireCache and their last modification date are shown::
properties of ENGINES_CACHE
===========================
[last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
[last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
[last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
[last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
[last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
[last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
[last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
[last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
[last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
These properties provide information about the state of the ExpireCache and
control the behavior. For example, the maintenance intervals are controlled by
the last modification date of the LAST_MAINTENANCE property and the hash value
of the password can be used to detect whether the password has been changed (in
this case the DB entries can no longer be decrypted and the entire cache must be
discarded).
"""
def __init__(self, engine_name: str, expire: int | None = None):
self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
_valid = "-_." + string.ascii_letters + string.digits
self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
def set(self, key: str, value: Any, expire: int | None = None) -> bool:
return ENGINES_CACHE.set(
key=key,
value=value,
expire=expire or self.expire,
ctx=self.table_name,
)
def get(self, key: str, default=None) -> Any:
return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
def secret_hash(self, name: str | bytes) -> str:
return ENGINES_CACHE.secret_hash(name=name)
class Engine: # pylint: disable=too-few-public-methods
"""Class of engine instances build from YAML settings.
Further documentation see :ref:`general engine configuration`.
.. hint::
This class is currently never initialized and only used for type hinting.
"""
# Common options in the engine module
engine_type: str
"""Type of the engine (:ref:`searx.search.processors`)"""
paging: bool
"""Engine supports multiple pages."""
time_range_support: bool
"""Engine supports search time range."""
safesearch: bool
"""Engine supports SafeSearch"""
language_support: bool
"""Engine supports languages (locales) search."""
language: str
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
does support only this one language:
.. code:: yaml
- name: google french
engine: google
language: fr
"""
region: str
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
does support only this one region::
.. code:: yaml
- name: google belgium
engine: google
region: fr-BE
"""
fetch_traits: Callable
"""Function to to fetch engine's traits from origin."""
traits: traits.EngineTraits
"""Traits of the engine."""
# settings.yml
categories: List[str]
"""Specifies to which :ref:`engine categories` the engine should be added."""
name: str
"""Name that will be used across SearXNG to define this engine. In settings, on
the result page .."""
engine: str
"""Name of the python file used to handle requests and responses to and from
this search engine (file name from :origin:`searx/engines` without
``.py``)."""
enable_http: bool
"""Enable HTTP (by default only HTTPS is enabled)."""
shortcut: str
"""Code used to execute bang requests (``!foo``)"""
timeout: float
"""Specific timeout for search-engine."""
display_error_messages: bool
"""Display error messages on the web UI."""
proxies: dict
"""Set proxies for a specific engine (YAML):
.. code:: yaml
proxies :
http: socks5://proxy:port
https: socks5://proxy:port
"""
disabled: bool
"""To disable by default the engine, but not deleting it. It will allow the
user to manually activate it in the settings."""
inactive: bool
"""Remove the engine from the settings (*disabled & removed*)."""
about: dict
"""Additional fields describing the engine.
.. code:: yaml
about:
website: https://example.com
wikidata_id: Q306656
official_api_documentation: https://example.com/api-doc
use_official_api: true
require_api_key: true
results: HTML
"""
using_tor_proxy: bool
"""Using tor proxy (``true``) or not (``false``) for this engine."""
send_accept_language_header: bool
"""When this option is activated, the language (locale) that is selected by
the user is used to build and send a ``Accept-Language`` header in the
request to the origin search engine."""
tokens: List[str]
"""A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`."""
weight: int
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""

View File

@@ -0,0 +1,21 @@
"""Implementation of a command line for development purposes. To start a
command, switch to the environment and run library module as a script::
$ ./manage pyenv.cmd bash --norc --noprofile
(py3) python -m searx.enginelib --help
The following commands can be used for maintenance and introspection
(development) of the engine cache::
(py3) python -m searx.enginelib cache state
(py3) python -m searx.enginelib cache maintenance
"""
import typer
from .. import enginelib
app = typer.Typer()
app.add_typer(enginelib.app, name="cache", help="Commands related to the cache of the engines.")
app()

264
searx/enginelib/traits.py Normal file
View File

@@ -0,0 +1,264 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Engine's traits are fetched from the origin engines and stored in a JSON file
in the *data folder*. Most often traits are languages and region codes and
their mapping from SearXNG's representation to the representation in the origin
search engine. For new traits new properties can be added to the class
:py:class:`EngineTraits`.
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
used.
"""
from __future__ import annotations
import os
import json
import dataclasses
import types
from typing import Dict, Literal, Iterable, Union, Callable, Optional, TYPE_CHECKING
from searx import locales
from searx.data import data_dir, ENGINE_TRAITS
if TYPE_CHECKING:
from . import Engine
class EngineTraitsEncoder(json.JSONEncoder):
"""Encodes :class:`EngineTraits` to a serializable object, see
:class:`json.JSONEncoder`."""
def default(self, o):
"""Return dictionary of a :class:`EngineTraits` object."""
if isinstance(o, EngineTraits):
return o.__dict__
return super().default(o)
@dataclasses.dataclass
class EngineTraits:
"""The class is intended to be instantiated for each engine."""
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a region to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
send to the engine:
.. code:: python
regions ={
'fr-BE' : <engine's region name>,
}
for key, egnine_region regions.items():
searxng_region = babel.Locale.parse(key, sep='-')
...
"""
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a language to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
send to the engine:
.. code:: python
languages = {
'ca' : <engine's language name>,
}
for key, egnine_lang in languages.items():
searxng_lang = babel.Locale.parse(key)
...
"""
all_locale: Optional[str] = None
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
language").
"""
data_type: Literal['traits_v1'] = 'traits_v1'
"""Data type, default is 'traits_v1'.
"""
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
"""A place to store engine's custom traits, not related to the SearXNG core.
"""
def get_language(self, searxng_locale: str, default=None):
"""Return engine's language string that *best fits* to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
selected by the user.
:param default: engine's default language
The *best fits* rules are implemented in
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
which is determined from :py:obj:`EngineTraits.all_locale`.
"""
if searxng_locale == 'all' and self.all_locale is not None:
return self.all_locale
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
def get_region(self, searxng_locale: str, default=None):
"""Return engine's region string that best fits to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
selected by the user.
:param default: engine's default region
The *best fits* rules are implemented in
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
which is determined from :py:obj:`EngineTraits.all_locale`.
"""
if searxng_locale == 'all' and self.all_locale is not None:
return self.all_locale
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
def is_locale_supported(self, searxng_locale: str) -> bool:
"""A *locale* (SearXNG's internal representation) is considered to be
supported by the engine if the *region* or the *language* is supported
by the engine.
For verification the functions :py:func:`EngineTraits.get_region` and
:py:func:`EngineTraits.get_language` are used.
"""
if self.data_type == 'traits_v1':
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
raise TypeError('engine traits of type %s is unknown' % self.data_type)
def copy(self):
"""Create a copy of the dataclass object."""
return EngineTraits(**dataclasses.asdict(self))
@classmethod
def fetch_traits(cls, engine: Engine) -> Union['EngineTraits', None]:
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
and set properties from the origin engine in the object ``engine_traits``. If
function does not exists, ``None`` is returned.
"""
fetch_traits = getattr(engine, 'fetch_traits', None)
engine_traits = None
if fetch_traits:
engine_traits = cls()
fetch_traits(engine_traits)
return engine_traits
def set_traits(self, engine: Engine):
"""Set traits from self object in a :py:obj:`.Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
"""
if self.data_type == 'traits_v1':
self._set_traits_v1(engine)
else:
raise TypeError('engine traits of type %s is unknown' % self.data_type)
def _set_traits_v1(self, engine: Engine):
# For an engine, when there is `language: ...` in the YAML settings the engine
# does support only this one language (region)::
#
# - name: google italian
# engine: google
# language: it
# region: it-IT # type: ignore
traits = self.copy()
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
languages = traits.languages
if hasattr(engine, 'language'):
if engine.language not in languages:
raise ValueError(_msg % (engine.name, 'language', engine.language))
traits.languages = {engine.language: languages[engine.language]}
regions = traits.regions
if hasattr(engine, 'region'):
if engine.region not in regions:
raise ValueError(_msg % (engine.name, 'region', engine.region))
traits.regions = {engine.region: regions[engine.region]}
engine.language_support = bool(traits.languages or traits.regions)
# set the copied & modified traits in engine's namespace
engine.traits = traits
class EngineTraitsMap(Dict[str, EngineTraits]):
"""A python dictionary to map :class:`EngineTraits` by engine name."""
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
def save_data(self):
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
@classmethod
def from_data(cls) -> 'EngineTraitsMap':
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
obj = cls()
for k, v in ENGINE_TRAITS.items():
obj[k] = EngineTraits(**v)
return obj
@classmethod
def fetch_traits(cls, log: Callable) -> 'EngineTraitsMap':
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
names = list(engines.engines)
names.sort()
obj = cls()
for engine_name in names:
engine = engines.engines[engine_name]
traits = None
# pylint: disable=broad-exception-caught
try:
traits = EngineTraits.fetch_traits(engine)
except Exception as exc:
log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
raise
v = ENGINE_TRAITS.get(engine_name)
if v:
log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
traits = EngineTraits(**v)
if traits is not None:
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
obj[engine_name] = traits
return obj
def set_traits(self, engine: Engine | types.ModuleType):
"""Set traits in a :py:obj:`Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
"""
engine_traits = EngineTraits(data_type='traits_v1')
if engine.name in self.keys():
engine_traits = self[engine.name]
elif engine.engine in self.keys():
# The key of the dictionary traits_map is the *engine name*
# configured in settings.xml. When multiple engines are configured
# in settings.yml to use the same origin engine (python module)
# these additional engines can use the languages from the origin
# engine. For this use the configured ``engine: ...`` from
# settings.yml
engine_traits = self[engine.engine]
engine_traits.set_traits(engine)