first commit

This commit is contained in:
Iyas Altawil
2025-06-26 15:38:10 +03:30
commit e928faf6d2
899 changed files with 403713 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the result items generated by the *engines*, *answerers* and
*plugins*.
.. note::
We are at the beginning of typing the results. Further typing will follow,
but this is a very large task that we will only be able to implement
gradually. For more, please read :ref:`result types`.
"""
# pylint: disable=too-few-public-methods
from __future__ import annotations
__all__ = ["Result", "MainResult", "KeyValue", "EngineResults", "AnswerSet", "Answer", "Translations", "WeatherAnswer"]
import abc
from searx import enginelib
from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue
class ResultList(list, abc.ABC):
"""Base class of all result lists (abstract)."""
class types: # pylint: disable=invalid-name
"""The collection of result types (which have already been implemented)."""
Answer = Answer
KeyValue = KeyValue
MainResult = MainResult
Result = Result
Translations = Translations
WeatherAnswer = WeatherAnswer
# for backward compatibility
LegacyResult = LegacyResult
def __init__(self):
# pylint: disable=useless-parent-delegation
super().__init__()
def add(self, result: Result | LegacyResult):
"""Add a :py:`Result` item to the result list."""
self.append(result)
class EngineResults(ResultList):
"""Result list that should be used by engine developers. For convenience,
engine developers don't need to import types / see :py:obj:`ResultList.types`.
.. code:: python
from searx.result_types import EngineResults
...
def response(resp) -> EngineResults:
res = EngineResults()
...
res.add( res.types.Answer(answer="lorem ipsum ..", url="https://example.org") )
...
return res
"""

559
searx/result_types/_base.py Normal file
View File

@@ -0,0 +1,559 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-few-public-methods, missing-module-docstring
"""Basic types for the typification of results.
- :py:obj:`Result` base class
- :py:obj:`LegacyResult` for internal use only
----
.. autoclass:: Result
:members:
.. _LegacyResult:
.. autoclass:: LegacyResult
:members:
"""
from __future__ import annotations
__all__ = ["Result"]
import re
import urllib.parse
import warnings
import typing
import time
import datetime
from collections.abc import Callable
import msgspec
from searx import logger as log
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
UNKNOWN = object()
def _normalize_url_fields(result: Result | LegacyResult):
# As soon we need LegacyResult not any longer, we can move this function to
# method Result.normalize_result_fields
if result.url and not result.parsed_url:
if not isinstance(result.url, str):
log.debug('result: invalid URL: %s', str(result))
result.url = ""
result.parsed_url = None
else:
result.parsed_url = urllib.parse.urlparse(result.url)
if result.parsed_url:
result.parsed_url = result.parsed_url._replace(
# if the result has no scheme, use http as default
scheme=result.parsed_url.scheme or "http",
path=result.parsed_url.path,
)
result.url = result.parsed_url.geturl()
if isinstance(result, LegacyResult) and getattr(result, "infobox", None):
# As soon we have InfoboxResult, we can move this function to method
# InfoboxResult.normalize_result_fields
infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
for item in infobox_urls:
_url = item.get("url")
if not _url:
continue
_url = urllib.parse.urlparse(_url)
item["url"] = _url._replace(
scheme=_url.scheme or "http",
# netloc=_url.netloc.replace("www.", ""),
path=_url.path,
).geturl()
infobox_id = getattr(result, "id", None)
if infobox_id:
_url = urllib.parse.urlparse(infobox_id)
result.id = _url._replace(
scheme=_url.scheme or "http",
# netloc=_url.netloc.replace("www.", ""),
path=_url.path,
).geturl()
def _normalize_text_fields(result: MainResult | LegacyResult):
# As soon we need LegacyResult not any longer, we can move this function to
# method MainResult.normalize_result_fields
# Actually, a type check should not be necessary if the engine is
# implemented correctly. Historically, however, we have always had a type
# check here.
if result.title and not isinstance(result.title, str):
log.debug("result: invalid type of field 'title': %s", str(result))
result.title = str(result)
if result.content and not isinstance(result.content, str):
log.debug("result: invalid type of field 'content': %s", str(result))
result.content = str(result)
# normalize title and content
if result.title:
result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
if result.content:
result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
if result.content == result.title:
# avoid duplicate content between the content and title fields
result.content = ""
def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
# pylint: disable=too-many-branches, too-many-statements
# As soon we need LegacyResult not any longer, we can move this function to
# method Result.
url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
for field_name in url_fields:
url_src = getattr(result, field_name, "")
if not url_src:
continue
new_url = filter_func(result, field_name, url_src)
# log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
if isinstance(new_url, bool):
if new_url:
# log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
continue
log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
new_url = None
else:
log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
setattr(result, field_name, new_url)
if field_name == "url":
# sync parsed_url with new_url
if not new_url:
result.parsed_url = None
elif isinstance(new_url, str):
result.parsed_url = urllib.parse.urlparse(new_url)
# "urls": are from infobox
#
# As soon we have InfoboxResult, we can move this function to method
# InfoboxResult.normalize_result_fields
infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
if infobox_urls:
# log.debug("filter_urls: infobox_urls .. %s", infobox_urls)
new_infobox_urls: list[dict[str, str]] = []
for item in infobox_urls:
url_src = item.get("url")
if not url_src:
new_infobox_urls.append(item)
continue
new_url = filter_func(result, "infobox_urls", url_src)
if isinstance(new_url, bool):
if new_url:
new_infobox_urls.append(item)
# log.debug("filter_urls: leave URL in field 'urls' ('infobox_urls') unchanged -> %s", _url)
continue
log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
new_url = None
if new_url:
log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
item["url"] = new_url
new_infobox_urls.append(item)
setattr(result, "urls", new_infobox_urls)
# "attributes": are from infobox
#
# The infobox has additional subsections for attributes, urls and relatedTopics:
infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
if infobox_attributes:
# log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
new_infobox_attributes: list[dict[str, dict]] = []
for item in infobox_attributes:
image = item.get("image", {})
url_src = image.get("src", "")
if not url_src:
new_infobox_attributes.append(item)
continue
new_url = filter_func(result, "infobox_attributes", url_src)
if isinstance(new_url, bool):
if new_url:
new_infobox_attributes.append(item)
# log.debug("filter_urls: leave URL in field 'image.src' unchanged -> %s", url_src)
continue
log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
new_url = None
if new_url:
log.debug(
"filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
url_src,
new_url,
)
item["image"]["src"] = new_url
new_infobox_attributes.append(item)
setattr(result, "attributes", new_infobox_attributes)
result.normalize_result_fields()
def _normalize_date_fields(result: MainResult | LegacyResult):
if result.publishedDate: # do not try to get a date from an empty string or a None type
try: # test if publishedDate >= 1900 (datetime module bug)
result.pubdate = result.publishedDate.strftime('%Y-%m-%d %H:%M:%S%z')
except ValueError:
result.publishedDate = None
class Result(msgspec.Struct, kw_only=True):
"""Base class of all result types :ref:`result types`."""
url: str | None = None
"""A link related to this *result*"""
template: str = "default.html"
"""Name of the template used to render the result.
By default :origin:`result_templates/default.html
<searx/templates/simple/result_templates/default.html>` is used.
"""
engine: str | None = ""
"""Name of the engine *this* result comes from. In case of *plugins* a
prefix ``plugin:`` is set, in case of *answerer* prefix ``answerer:`` is
set.
The field is optional and is initialized from the context if necessary.
"""
parsed_url: urllib.parse.ParseResult | None = None
""":py:obj:`urllib.parse.ParseResult` of :py:obj:`Result.url`.
The field is optional and is initialized from the context if necessary.
"""
def normalize_result_fields(self):
"""Normalize fields ``url`` and ``parse_sql``.
- If field ``url`` is set and field ``parse_url`` is unset, init
``parse_url`` from field ``url``. The ``url`` field is initialized
with the resulting value in ``parse_url``, if ``url`` and
``parse_url`` are not equal.
"""
_normalize_url_fields(self)
def __post_init__(self):
pass
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
"""A filter function is passed in the ``filter_func`` argument to
filter and/or modify the URLs.
The filter function receives the :py:obj:`result object <Result>` as
the first argument and the field name (``str``) in the second argument.
In the third argument the URL string value is passed to the filter function.
The filter function is applied to all fields that contain a URL,
in addition to the familiar ``url`` field, these include fields such as::
["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
and the ``urls`` list of items of the infobox.
For each field, the filter function is called and returns a bool or a
string value:
- ``True``: leave URL in field unchanged
- ``False``: remove URL field from result (or remove entire result)
- ``str``: modified URL to be used instead
See :ref:`filter urls example`.
"""
_filter_urls(self, filter_func=filter_func)
def __hash__(self) -> int:
"""Generates a hash value that uniquely identifies the content of *this*
result. The method can be adapted in the inheritance to compare results
from different sources.
If two result objects are not identical but have the same content, their
hash values should also be identical.
The hash value is used in contexts, e.g. when checking for equality to
identify identical results from different sources (engines).
"""
return id(self)
def __eq__(self, other):
"""py:obj:`Result` objects are equal if the hash values of the two
objects are equal. If needed, its recommended to overwrite
"py:obj:`Result.__hash__`."""
return hash(self) == hash(other)
# for legacy code where a result is treated as a Python dict
def __setitem__(self, field_name, value):
return setattr(self, field_name, value)
def __getitem__(self, field_name):
if field_name not in self.__struct_fields__:
raise KeyError(f"{field_name}")
return getattr(self, field_name)
def __iter__(self):
return iter(self.__struct_fields__)
def as_dict(self):
return {f: getattr(self, f) for f in self.__struct_fields__}
def defaults_from(self, other: Result):
"""Fields not set in *self* will be updated from the field values of the
*other*.
"""
for field_name in self.__struct_fields__:
self_val = getattr(self, field_name, False)
other_val = getattr(other, field_name, False)
if self_val:
setattr(self, field_name, other_val)
class MainResult(Result): # pylint: disable=missing-class-docstring
"""Base class of all result types displayed in :ref:`area main results`."""
title: str = ""
"""Link title of the result item."""
content: str = ""
"""Extract or description of the result item"""
img_src: str = ""
"""URL of a image that is displayed in the result item."""
thumbnail: str = ""
"""URL of a thumbnail that is displayed in the result item."""
publishedDate: datetime.datetime | None = None
"""The date on which the object was published."""
pubdate: str = ""
"""String representation of :py:obj:`MainResult.publishedDate`"""
length: time.struct_time | None = None
"""Playing duration in seconds."""
views: str = ""
"""View count in humanized number format."""
author: str = ""
"""Author of the title."""
metadata: str = ""
"""Miscellaneous metadata."""
priority: typing.Literal["", "high", "low"] = ""
"""The priority can be set via :ref:`hostnames plugin`, for example."""
engines: set[str] = set()
"""In a merged results list, the names of the engines that found this result
are listed in this field."""
# open_group and close_group should not manged in the Result
# class (we should drop it from here!)
open_group: bool = False
close_group: bool = False
positions: list[int] = []
score: float = 0
category: str = ""
def __hash__(self) -> int:
"""Ordinary url-results are equal if their values for
:py:obj:`Result.template`, :py:obj:`Result.parsed_url` (without scheme)
and :py:obj:`MainResult.img_src` are equal.
"""
if not self.parsed_url:
raise ValueError(f"missing a value in field 'parsed_url': {self}")
url = self.parsed_url
return hash(
f"{self.template}"
+ f"|{url.netloc}|{url.path}|{url.params}|{url.query}|{url.fragment}"
+ f"|{self.img_src}"
)
def normalize_result_fields(self):
super().normalize_result_fields()
_normalize_text_fields(self)
_normalize_date_fields(self)
if self.engine:
self.engines.add(self.engine)
class LegacyResult(dict):
"""A wrapper around a legacy result item. The SearXNG core uses this class
for untyped dictionaries / to be downward compatible.
This class is needed until we have implemented an :py:obj:`Result` class for
each result type and the old usages in the codebase have been fully
migrated.
There is only one place where this class is used, in the
:py:obj:`searx.results.ResultContainer`.
.. attention::
Do not use this class in your own implementations!
"""
UNSET = object()
# emulate field types from type class Result
url: str | None
template: str
engine: str
parsed_url: urllib.parse.ParseResult | None
# emulate field types from type class MainResult
title: str
content: str
img_src: str
thumbnail: str
priority: typing.Literal["", "high", "low"]
engines: set[str]
positions: list[int]
score: float
category: str
publishedDate: datetime.datetime | None = None
pubdate: str = ""
# infobox result
urls: list[dict[str, str]]
attributes: list[dict[str, str]]
def as_dict(self):
return self
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# emulate field types from type class Result
self["url"] = self.get("url")
self["template"] = self.get("template", "default.html")
self["engine"] = self.get("engine", "")
self["parsed_url"] = self.get("parsed_url")
# emulate field types from type class MainResult
self["title"] = self.get("title", "")
self["content"] = self.get("content", "")
self["img_src"] = self.get("img_src", "")
self["thumbnail"] = self.get("thumbnail", "")
self["priority"] = self.get("priority", "")
self["engines"] = self.get("engines", set())
self["positions"] = self.get("positions", "")
self["score"] = self.get("score", 0)
self["category"] = self.get("category", "")
if "infobox" in self:
self["urls"] = self.get("urls", [])
self["attributes"] = self.get("attributes", [])
# Legacy types that have already been ported to a type ..
if "answer" in self:
warnings.warn(
f"engine {self.engine} is using deprecated `dict` for answers"
f" / use a class from searx.result_types.answer",
DeprecationWarning,
)
self.template = "answer/legacy.html"
if self.template == "keyvalue.html":
warnings.warn(
f"engine {self.engine} is using deprecated `dict` for key/value results"
f" / use a class from searx.result_types",
DeprecationWarning,
)
def __getattr__(self, name: str, default=UNSET) -> typing.Any:
if default == self.UNSET and name not in self:
raise AttributeError(f"LegacyResult object has no field named: {name}")
return self[name]
def __setattr__(self, name: str, val):
self[name] = val
def __hash__(self) -> int: # type: ignore
if "answer" in self:
# deprecated ..
return hash(self["answer"])
if self.template == "images.html":
# image results are equal if their values for template, the url and
# the img_src are equal.
return hash(f"{self.template}|{self.url}|{self.img_src}")
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
# Ordinary url-results are equal if their values for template,
# parsed_url (without schema) and img_src` are equal.
# Code copied from with MainResult.__hash__:
if not self.parsed_url:
raise ValueError(f"missing a value in field 'parsed_url': {self}")
url = self.parsed_url
return hash(
f"{self.template}"
+ f"|{url.netloc}|{url.path}|{url.params}|{url.query}|{url.fragment}"
+ f"|{self.img_src}"
)
return id(self)
def __eq__(self, other):
return hash(self) == hash(other)
def __repr__(self) -> str:
return f"LegacyResult: {super().__repr__()}"
def normalize_result_fields(self):
_normalize_date_fields(self)
_normalize_url_fields(self)
_normalize_text_fields(self)
if self.engine:
self.engines.add(self.engine)
def defaults_from(self, other: LegacyResult):
for k, v in other.items():
if not self.get(k):
self[k] = v
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
"""See :py:obj:`Result.filter_urls`"""
_filter_urls(self, filter_func=filter_func)

View File

@@ -0,0 +1,238 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Typification of the *answer* results. Results of this type are rendered in
the :origin:`answers.html <searx/templates/simple/elements/answers.html>`
template.
----
.. autoclass:: BaseAnswer
:members:
:show-inheritance:
.. autoclass:: Answer
:members:
:show-inheritance:
.. autoclass:: Translations
:members:
:show-inheritance:
.. autoclass:: WeatherAnswer
:members:
:show-inheritance:
.. autoclass:: AnswerSet
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods
from __future__ import annotations
__all__ = ["AnswerSet", "Answer", "Translations", "WeatherAnswer"]
from flask_babel import gettext
import msgspec
from searx import weather
from ._base import Result
class BaseAnswer(Result, kw_only=True):
"""Base class of all answer types. It is not intended to build instances of
this class (aka *abstract*)."""
class AnswerSet:
"""Aggregator for :py:obj:`BaseAnswer` items in a result container."""
def __init__(self):
self._answerlist = []
def __len__(self):
return len(self._answerlist)
def __bool__(self):
return bool(self._answerlist)
def add(self, answer: BaseAnswer) -> None:
a_hash = hash(answer)
for i in self._answerlist:
if hash(i) == a_hash:
return
self._answerlist.append(answer)
def __iter__(self):
"""Sort items in this set and iterate over the items."""
self._answerlist.sort(key=lambda answer: answer.template)
yield from self._answerlist
def __contains__(self, answer: BaseAnswer) -> bool:
a_hash = hash(answer)
for i in self._answerlist:
if hash(i) == a_hash:
return True
return False
class Answer(BaseAnswer, kw_only=True):
"""Simple answer type where the *answer* is a simple string with an optional
:py:obj:`url field <Result.url>` field to link a resource (article, map, ..)
related to the answer."""
template: str = "answer/legacy.html"
answer: str
"""Text of the answer."""
def __hash__(self):
"""The hash value of field *answer* is the hash value of the
:py:obj:`Answer` object. :py:obj:`Answer <Result.__eq__>` objects are
equal, when the hash values of both objects are equal."""
return hash(self.answer)
class Translations(BaseAnswer, kw_only=True):
"""Answer type with a list of translations.
The items in the list of :py:obj:`Translations.translations` are of type
:py:obj:`Translations.Item`:
.. code:: python
def response(resp):
results = []
...
foo_1 = Translations.Item(
text="foobar",
synonyms=["bar", "foo"],
examples=["foo and bar are placeholders"],
)
foo_url="https://www.deepl.com/de/translator#en/de/foo"
...
Translations(results=results, translations=[foo], url=foo_url)
"""
template: str = "answer/translations.html"
"""The template in :origin:`answer/translations.html
<searx/templates/simple/answer/translations.html>`"""
translations: list[Translations.Item]
"""List of translations."""
def __post_init__(self):
if not self.translations:
raise ValueError("Translation does not have an item in the list translations")
class Item(msgspec.Struct, kw_only=True):
"""A single element of the translations / a translation. A translation
consists of at least a mandatory ``text`` property (the translation) ,
optional properties such as *definitions*, *synonyms* and *examples* are
possible."""
text: str
"""Translated text."""
transliteration: str = ""
"""Transliteration_ of the requested translation.
.. _Transliteration: https://en.wikipedia.org/wiki/Transliteration
"""
examples: list[str] = []
"""List of examples for the requested translation."""
definitions: list[str] = []
"""List of definitions for the requested translation."""
synonyms: list[str] = []
"""List of synonyms for the requested translation."""
class WeatherAnswer(BaseAnswer, kw_only=True):
"""Answer type for weather data."""
template: str = "answer/weather.html"
"""The template is located at :origin:`answer/weather.html
<searx/templates/simple/answer/weather.html>`"""
current: WeatherAnswer.Item
"""Current weather at ``location``."""
forecasts: list[WeatherAnswer.Item] = []
"""Weather forecasts for ``location``."""
service: str = ""
"""Weather service from which this information was provided."""
class Item(msgspec.Struct, kw_only=True):
"""Weather parameters valid for a specific point in time."""
location: weather.GeoLocation
"""The geo-location the weather data is from (e.g. `Berlin, Germany`)."""
temperature: weather.Temperature
"""Air temperature at 2m above the ground."""
condition: weather.WeatherConditionType
"""Standardized designations that summarize the weather situation
(e.g. ``light sleet showers and thunder``)."""
# optional fields
datetime: weather.DateTime | None = None
"""Time of the forecast - not needed for the current weather."""
summary: str | None = None
"""One-liner about the weather forecast / current weather conditions.
If unset, a summary is build up from temperature and current weather
conditions.
"""
feels_like: weather.Temperature | None = None
"""Apparent temperature, the temperature equivalent perceived by
humans, caused by the combined effects of air temperature, relative
humidity and wind speed. The measure is most commonly applied to the
perceived outdoor temperature.
"""
pressure: weather.Pressure | None = None
"""Air pressure at sea level (e.g. 1030 hPa) """
humidity: weather.RelativeHumidity | None = None
"""Amount of relative humidity in the air at 2m above the ground. The
unit is ``%``, e.g. 60%)
"""
wind_from: weather.Compass
"""The directon which moves towards / direction the wind is coming from."""
wind_speed: weather.WindSpeed | None = None
"""Speed of wind / wind speed at 10m above the ground (10 min average)."""
cloud_cover: int | None = None
"""Amount of sky covered by clouds / total cloud cover for all heights
(cloudiness, unit: %)"""
# attributes: dict[str, str | int] = {}
# """Key-Value dict of additional typeless weather attributes."""
def __post_init__(self):
if not self.summary:
self.summary = gettext("{location}: {temperature}, {condition}").format(
location=self.location,
temperature=self.temperature,
condition=gettext(self.condition.capitalize()),
)
@property
def url(self) -> str | None:
"""Determines a `data URL`_ with a symbol for the weather
conditions. If no symbol can be assigned, ``None`` is returned.
.. _data URL:
https://developer.mozilla.org/en-US/docs/Web/URI/Reference/Schemes/data
"""
return weather.symbol_url(self.condition)

View File

@@ -0,0 +1,49 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Typification of the *keyvalue* results. Results of this type are rendered in
the :origin:`keyvalue.html <searx/templates/simple/result_templates/keyvalue.html>`
template.
----
.. autoclass:: KeyValue
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods
from __future__ import annotations
__all__ = ["KeyValue"]
import typing
from collections import OrderedDict
from ._base import MainResult
class KeyValue(MainResult, kw_only=True):
"""Simple table view which maps *key* names (first col) to *values*
(second col)."""
template: str = "keyvalue.html"
kvmap: dict[str, typing.Any] | OrderedDict[str, typing.Any]
"""Dictionary with keys and values. To sort keys, use :py:obj:`OrderedDict`."""
caption: str = ""
"""Optional caption for this result."""
key_title: str = ""
"""Optional title for the *key column*."""
value_title: str = ""
"""Optional title for the *value column*."""
def __hash__(self) -> int:
"""The KeyValues objects are checked for object identity, even if all
fields of two results have the same values, they are different from each
other.
"""
return id(self)