first commit

This commit is contained in:
Iyas Altawil
2025-06-26 15:38:10 +03:30
commit e928faf6d2
899 changed files with 403713 additions and 0 deletions

110
searx/plugins/__init__.py Normal file
View File

@@ -0,0 +1,110 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. sidebar:: Further reading ..
- :ref:`plugins admin`
- :ref:`SearXNG settings <settings plugins>`
Plugins can extend or replace functionality of various components of SearXNG.
Entry points (hooks) define when a plugin runs. Right now only three hooks are
implemented. So feel free to implement a hook if it fits the behaviour of your
plugin / a plugin doesn't need to implement all the hooks.
- pre search: :py:obj:`Plugin.pre_search`
- post search: :py:obj:`Plugin.post_search`
- on each result item: :py:obj:`Plugin.on_result`
Below you will find some examples, for more coding examples have a look at the
built-in plugins :origin:`searx/plugins/` or `Only show green hosted results`_.
.. _Only show green hosted results:
https://github.com/return42/tgwf-searx-plugins/
Add Answer example
==================
Here is an example of a very simple plugin that adds a "Hello World" into the
answer area:
.. code:: python
from flask_babel import gettext as _
from searx.plugins import Plugin
from searx.result_types import Answer
class MyPlugin(Plugin):
id = "hello world"
def __init__(self, plg_cfg):
super().__init__(plg_cfg)
self.info = PluginInfo(id=self.id, name=_("Hello"), description=_("demo plugin"))
def post_search(self, request, search):
return [ Answer(answer="Hello World") ]
.. _filter urls example:
Filter URLs example
===================
.. sidebar:: Further reading ..
- :py:obj:`Result.filter_urls(..) <searx.result_types._base.Result.filter_urls>`
The :py:obj:`Result.filter_urls(..) <searx.result_types._base.Result.filter_urls>`
can be used to filter and/or modify URL fields. In the following example, the
filter function ``my_url_filter``:
.. code:: python
def my_url_filter(result, field_name, url_src) -> bool | str:
if "google" in url_src:
return False # remove URL field from result
if "facebook" in url_src:
new_url = url_src.replace("facebook", "fb-dummy")
return new_url # return modified URL
return True # leave URL in field unchanged
is applied to all URL fields in the :py:obj:`Plugin.on_result` hook:
.. code:: python
class MyUrlFilter(Plugin):
...
def on_result(self, request, search, result) -> bool:
result.filter_urls(my_url_filter)
return True
Implementation
==============
.. autoclass:: Plugin
:members:
.. autoclass:: PluginInfo
:members:
.. autoclass:: PluginStorage
:members:
.. autoclass:: PluginCfg
:members:
"""
from __future__ import annotations
__all__ = ["PluginInfo", "Plugin", "PluginStorage", "PluginCfg"]
import searx
from ._core import PluginInfo, Plugin, PluginStorage, PluginCfg
STORAGE: PluginStorage = PluginStorage()
def initialize(app):
STORAGE.load_settings(searx.get_setting("plugins"))
STORAGE.init(app)

304
searx/plugins/_core.py Normal file
View File

@@ -0,0 +1,304 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-few-public-methods,missing-module-docstring
from __future__ import annotations
__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"]
import abc
import importlib
import inspect
import logging
import re
import typing
from dataclasses import dataclass, field
from searx.extended_types import SXNG_Request
from searx.result_types import Result
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
import flask
log: logging.Logger = logging.getLogger("searx.plugins")
@dataclass
class PluginInfo:
"""Object that holds information about a *plugin*, these infos are shown to
the user in the Preferences menu.
To be able to translate the information into other languages, the text must
be written in English and translated with :py:obj:`flask_babel.gettext`.
"""
id: str
"""The ID-selector in HTML/CSS `#<id>`."""
name: str
"""Name of the *plugin*."""
description: str
"""Short description of the *answerer*."""
preference_section: typing.Literal["general", "ui", "privacy", "query"] | None = "general"
"""Section (tab/group) in the preferences where this plugin is shown to the
user.
The value ``query`` is reserved for plugins that are activated via a
*keyword* as part of a search query, see:
- :py:obj:`PluginInfo.examples`
- :py:obj:`Plugin.keywords`
Those plugins are shown in the preferences in tab *Special Queries*.
"""
examples: list[str] = field(default_factory=list)
"""List of short examples of the usage / of query terms."""
keywords: list[str] = field(default_factory=list)
"""See :py:obj:`Plugin.keywords`"""
ID_REGXP = re.compile("[a-z][a-z0-9].*")
class Plugin(abc.ABC):
"""Abstract base class of all Plugins."""
id: str = ""
"""The ID (suffix) in the HTML form."""
active: typing.ClassVar[bool]
"""Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`)."""
keywords: list[str] = []
"""Keywords in the search query that activate the plugin. The *keyword* is
the first word in a search query. If a plugin should be executed regardless
of the search query, the list of keywords should be empty (which is also the
default in the base class for Plugins)."""
log: logging.Logger
"""A logger object, is automatically initialized when calling the
constructor (if not already set in the subclass)."""
info: PluginInfo
"""Information about the *plugin*, see :py:obj:`PluginInfo`."""
fqn: str = ""
def __init__(self, plg_cfg: PluginCfg) -> None:
super().__init__()
if not self.fqn:
self.fqn = self.__class__.__mro__[0].__module__
# names from the configuration
for n, v in plg_cfg.__dict__.items():
setattr(self, n, v)
# names that must be set by the plugin implementation
for attr in [
"id",
]:
if getattr(self, attr, None) is None:
raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
if not ID_REGXP.match(self.id):
raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)")
if not getattr(self, "log", None):
pkg_name = inspect.getmodule(self.__class__).__package__ # type: ignore
self.log = logging.getLogger(f"{pkg_name}.{self.id}")
def __hash__(self) -> int:
"""The hash value is used in :py:obj:`set`, for example, when an object
is added to the set. The hash value is also used in other contexts,
e.g. when checking for equality to identify identical plugins from
different sources (name collisions)."""
return id(self)
def __eq__(self, other):
"""py:obj:`Plugin` objects are equal if the hash values of the two
objects are equal."""
return hash(self) == hash(other)
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
"""Initialization of the plugin, the return value decides whether this
plugin is active or not. Initialization only takes place once, at the
time the WEB application is set up. The base method always returns
``True``, the method can be overwritten in the inheritances,
- ``True`` plugin is active
- ``False`` plugin is inactive
"""
return True
# pylint: disable=unused-argument
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
"""Runs BEFORE the search request and returns a boolean:
- ``True`` to continue the search
- ``False`` to stop the search
"""
return True
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
"""Runs for each result of each engine and returns a boolean:
- ``True`` to keep the result
- ``False`` to remove the result from the result list
The ``result`` can be modified to the needs.
.. hint::
If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified,
:py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must
be changed accordingly:
.. code:: python
result["parsed_url"] = urlparse(result["url"])
"""
return True
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
"""Runs AFTER the search request. Can return a list of
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
final result list."""
return
@dataclass
class PluginCfg:
"""Settings of a plugin.
.. code:: yaml
mypackage.mymodule.MyPlugin:
active: true
"""
active: bool = False
"""Plugin is active by default and the user can *opt-out* in the preferences."""
class PluginStorage:
"""A storage for managing the *plugins* of SearXNG."""
plugin_list: set[Plugin]
"""The list of :py:obj:`Plugins` in this storage."""
def __init__(self):
self.plugin_list = set()
def __iter__(self):
yield from self.plugin_list
def __len__(self):
return len(self.plugin_list)
@property
def info(self) -> list[PluginInfo]:
return [p.info for p in self.plugin_list]
def load_settings(self, cfg: dict[str, dict]):
"""Load plugins configured in SearXNG's settings :ref:`settings
plugins`."""
for fqn, plg_settings in cfg.items():
cls = None
mod_name, cls_name = fqn.rsplit('.', 1)
try:
mod = importlib.import_module(mod_name)
cls = getattr(mod, cls_name, None)
except Exception as exc: # pylint: disable=broad-exception-caught
log.exception(exc)
if cls is None:
msg = f"plugin {fqn} is not implemented"
raise ValueError(msg)
plg = cls(PluginCfg(**plg_settings))
self.register(plg)
def register(self, plugin: Plugin):
"""Register a :py:obj:`Plugin`. In case of name collision (if two
plugins have same ID) a :py:obj:`KeyError` exception is raised.
"""
if plugin in [p.id for p in self.plugin_list]:
msg = f"name collision '{plugin.id}'"
plugin.log.critical(msg)
raise KeyError(msg)
self.plugin_list.add(plugin)
plugin.log.debug("plugin has been loaded")
def init(self, app: "flask.Flask") -> None:
"""Calls the method :py:obj:`Plugin.init` of each plugin in this
storage. Depending on its return value, the plugin is removed from
*this* storage or not."""
for plg in self.plugin_list.copy():
if not plg.init(app):
self.plugin_list.remove(plg)
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
ret = True
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
try:
ret = bool(plugin.pre_search(request=request, search=search))
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling pre_search")
continue
if not ret:
# skip this search on the first False from a plugin
break
return ret
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
ret = True
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
try:
ret = bool(plugin.on_result(request=request, search=search, result=result))
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling on_result")
continue
if not ret:
# ignore this result item on the first False from a plugin
break
return ret
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None:
"""Extend :py:obj:`search.result_container
<searx.results.ResultContainer`> with result items from plugins listed
in :py:obj:`search.user_plugins <SearchWithPlugins.user_plugins>`.
"""
keyword = None
for keyword in search.search_query.query.split():
if keyword:
break
for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
if plugin.keywords:
# plugin with keywords: skip plugin if no keyword match
if keyword and keyword not in plugin.keywords:
continue
try:
results = plugin.post_search(request=request, search=search) or []
except Exception: # pylint: disable=broad-except
plugin.log.exception("Exception while calling post_search")
continue
# In case of *plugins* prefix ``plugin:`` is set, see searx.result_types.Result
search.result_container.extend(f"plugin: {plugin.id}", results)

View File

@@ -0,0 +1,51 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from __future__ import annotations
import typing
from hashlib import md5
from flask_babel import gettext
from searx.data import ahmia_blacklist_loader
from searx import get_setting
from searx.plugins import Plugin, PluginInfo
if typing.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result
from searx.plugins import PluginCfg
ahmia_blacklist: list = []
class SXNGPlugin(Plugin):
"""Filter out onion results that appear in Ahmia's blacklist (See https://ahmia.fi/blacklist)."""
id = "ahmia_filter"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Ahmia blacklist"),
description=gettext("Filter out onion results that appear in Ahmia's blacklist."),
preference_section="general",
)
def on_result(
self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result
) -> bool: # pylint: disable=unused-argument
if not getattr(result, "is_onion", False) or not getattr(result, "parsed_url", False):
return True
result_hash = md5(result["parsed_url"].hostname.encode()).hexdigest()
return result_hash not in ahmia_blacklist
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
global ahmia_blacklist # pylint: disable=global-statement
if not get_setting("outgoing.using_tor_proxy"):
# disable the plugin
return False
ahmia_blacklist = ahmia_blacklist_loader()
return True

158
searx/plugins/calculator.py Normal file
View File

@@ -0,0 +1,158 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Calculate mathematical expressions using :py:obj:`ast.parse` (mode="eval").
"""
from __future__ import annotations
import typing
import ast
import re
import operator
import multiprocessing
import babel
import babel.numbers
from flask_babel import gettext
from searx.result_types import EngineResults
from searx.plugins import Plugin, PluginInfo
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.plugins import PluginCfg
class SXNGPlugin(Plugin):
"""Plugin converts strings to different hash digests. The results are
displayed in area for the "answers".
"""
id = "calculator"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Basic Calculator"),
description=gettext("Calculate mathematical expressions via the search bar"),
preference_section="general",
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
results = EngineResults()
# only show the result of the expression on the first page
if search.search_query.pageno > 1:
return results
query = search.search_query.query
# in order to avoid DoS attacks with long expressions, ignore long expressions
if len(query) > 100:
return results
# replace commonly used math operators with their proper Python operator
query = query.replace("x", "*").replace(":", "/")
# use UI language
ui_locale = babel.Locale.parse(request.preferences.get_value("locale"), sep="-")
# parse the number system in a localized way
def _decimal(match: re.Match) -> str:
val = match.string[match.start() : match.end()]
val = babel.numbers.parse_decimal(val, ui_locale, numbering_system="latn")
return str(val)
decimal = ui_locale.number_symbols["latn"]["decimal"]
group = ui_locale.number_symbols["latn"]["group"]
query = re.sub(f"[0-9]+[{decimal}|{group}][0-9]+[{decimal}|{group}]?[0-9]?", _decimal, query)
# only numbers and math operators are accepted
if any(str.isalpha(c) for c in query):
return results
# in python, powers are calculated via **
query_py_formatted = query.replace("^", "**")
# Prevent the runtime from being longer than 50 ms
res = timeout_func(0.05, _eval_expr, query_py_formatted)
if res is None or res == "":
return results
res = babel.numbers.format_decimal(res, locale=ui_locale)
results.add(results.types.Answer(answer=f"{search.search_query.query} = {res}"))
return results
operators: dict[type, typing.Callable] = {
ast.Add: operator.add,
ast.Sub: operator.sub,
ast.Mult: operator.mul,
ast.Div: operator.truediv,
ast.Pow: operator.pow,
ast.BitXor: operator.xor,
ast.USub: operator.neg,
}
# with multiprocessing.get_context("fork") we are ready for Py3.14 (by emulating
# the old behavior "fork") but it will not solve the core problem of fork, nor
# will it remove the deprecation warnings in py3.12 & py3.13. Issue is
# ddiscussed here: https://github.com/searxng/searxng/issues/4159
mp_fork = multiprocessing.get_context("fork")
def _eval_expr(expr):
"""
>>> _eval_expr('2^6')
64
>>> _eval_expr('2**6')
64
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
try:
return _eval(ast.parse(expr, mode='eval').body)
except ZeroDivisionError:
# This is undefined
return ""
def _eval(node):
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
return node.value
if isinstance(node, ast.BinOp):
return operators[type(node.op)](_eval(node.left), _eval(node.right))
if isinstance(node, ast.UnaryOp):
return operators[type(node.op)](_eval(node.operand))
raise TypeError(node)
def handler(q: multiprocessing.Queue, func, args, **kwargs): # pylint:disable=invalid-name
try:
q.put(func(*args, **kwargs))
except:
q.put(None)
raise
def timeout_func(timeout, func, *args, **kwargs):
que = mp_fork.Queue()
p = mp_fork.Process(target=handler, args=(que, func, args), kwargs=kwargs)
p.start()
p.join(timeout=timeout)
ret_val = None
# pylint: disable=used-before-assignment,undefined-variable
if not p.is_alive():
ret_val = que.get()
else:
logger.debug("terminate function after timeout is exceeded") # type: ignore
p.terminate()
p.join()
p.close()
return ret_val

View File

@@ -0,0 +1,68 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, missing-class-docstring
from __future__ import annotations
import typing
import re
import hashlib
from flask_babel import gettext
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.plugins import PluginCfg
class SXNGPlugin(Plugin):
"""Plugin converts strings to different hash digests. The results are
displayed in area for the "answers".
"""
id = "hash_plugin"
keywords = ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"]
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.parser_re = re.compile(f"({'|'.join(self.keywords)}) (.*)", re.I)
self.info = PluginInfo(
id=self.id,
name=gettext("Hash plugin"),
description=gettext(
"Converts strings to different hash digests. Available functions: md5, sha1, sha224, sha256, sha384, sha512." # pylint:disable=line-too-long
),
examples=["sha512 The quick brown fox jumps over the lazy dog"],
preference_section="query",
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
"""Returns a result list only for the first page."""
results = EngineResults()
if search.search_query.pageno > 1:
return results
m = self.parser_re.match(search.search_query.query)
if not m:
# wrong query
return results
function, string = m.groups()
if not string.strip():
# end if the string is empty
return results
# select hash function
f = hashlib.new(function.lower())
# make digest from the given string
f.update(string.encode("utf-8").strip())
answer = function + " " + gettext("hash digest") + ": " + f.hexdigest()
results.add(results.types.Answer(answer=answer))
return results

204
searx/plugins/hostnames.py Normal file
View File

@@ -0,0 +1,204 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-many-branches, unused-argument
"""
During the initialization phase, the plugin checks whether a ``hostnames:``
configuration exists. If this is not the case, the plugin is not included
in the PluginStorage (it is not available for selection).
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
replaced by other hostnames.
.. code:: yaml
hostnames:
replace:
'(.*\\.)?youtube\\.com$': 'invidious.example.com'
'(.*\\.)?youtu\\.be$': 'invidious.example.com'
...
- ``hostnames.remove``: A **list** of regular expressions of the hostnames whose
results should be taken from the results list.
.. code:: yaml
hostnames:
remove:
- '(.*\\.)?facebook.com$'
- ...
- ``hostnames.high_priority``: A **list** of regular expressions for hostnames
whose result should be given higher priority. The results from these hosts are
arranged higher in the results list.
.. code:: yaml
hostnames:
high_priority:
- '(.*\\.)?wikipedia.org$'
- ...
- ``hostnames.lower_priority``: A **list** of regular expressions for hostnames
whose result should be given lower priority. The results from these hosts are
arranged lower in the results list.
.. code:: yaml
hostnames:
low_priority:
- '(.*\\.)?google(\\..*)?$'
- ...
If the URL matches the pattern of ``high_priority`` AND ``low_priority``, the
higher priority wins over the lower priority.
Alternatively, you can also specify a file name for the **mappings** or
**lists** to load these from an external file:
.. code:: yaml
hostnames:
replace: 'rewrite-hosts.yml'
remove:
- '(.*\\.)?facebook.com$'
- ...
low_priority:
- '(.*\\.)?google(\\..*)?$'
- ...
high_priority:
- '(.*\\.)?wikipedia.org$'
- ...
The ``rewrite-hosts.yml`` from the example above must be in the folder in which
the ``settings.yml`` file is already located (``/etc/searxng``). The file then
only contains the lists or the mapping tables without further information on the
namespaces. In the example above, this would be a mapping table that looks
something like this:
.. code:: yaml
'(.*\\.)?youtube\\.com$': 'invidious.example.com'
'(.*\\.)?youtu\\.be$': 'invidious.example.com'
"""
from __future__ import annotations
import typing
import re
from urllib.parse import urlunparse, urlparse
from flask_babel import gettext
from searx import settings
from searx.result_types._base import MainResult, LegacyResult
from searx.settings_loader import get_yaml_cfg
from searx.plugins import Plugin, PluginInfo
from ._core import log
if typing.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result
from searx.plugins import PluginCfg
REPLACE: dict[re.Pattern, str] = {}
REMOVE: set = set()
HIGH: set = set()
LOW: set = set()
class SXNGPlugin(Plugin):
"""Rewrite hostnames, remove results or prioritize them."""
id = "hostnames"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Hostnames plugin"),
description=gettext("Rewrite hostnames and remove or prioritize results based on the hostname"),
preference_section="general",
)
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
for pattern in REMOVE:
if result.parsed_url and pattern.search(result.parsed_url.netloc):
# if the link (parsed_url) of the result match, then remove the
# result from the result list, in any other case, the result
# remains in the list / see final "return True" below.
# log.debug("FIXME: remove [url/parsed_url] %s %s", pattern.pattern, result.url)
return False
result.filter_urls(filter_url_field)
if isinstance(result, (MainResult, LegacyResult)):
for pattern in LOW:
if result.parsed_url and pattern.search(result.parsed_url.netloc):
result.priority = "low"
for pattern in HIGH:
if result.parsed_url and pattern.search(result.parsed_url.netloc):
result.priority = "high"
return True
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
global REPLACE, REMOVE, HIGH, LOW # pylint: disable=global-statement
if not settings.get(self.id):
# Remove plugin, if there isn't a "hostnames:" setting
return False
REPLACE = self._load_regular_expressions("replace") or {} # type: ignore
REMOVE = self._load_regular_expressions("remove") or set() # type: ignore
HIGH = self._load_regular_expressions("high_priority") or set() # type: ignore
LOW = self._load_regular_expressions("low_priority") or set() # type: ignore
return True
def _load_regular_expressions(self, settings_key) -> dict[re.Pattern, str] | set | None:
setting_value = settings.get(self.id, {}).get(settings_key)
if not setting_value:
return None
# load external file with configuration
if isinstance(setting_value, str):
setting_value = get_yaml_cfg(setting_value)
if isinstance(setting_value, list):
return {re.compile(r) for r in setting_value}
if isinstance(setting_value, dict):
return {re.compile(p): r for (p, r) in setting_value.items()}
return None
def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
"""Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
If URL should be modified, the returned string is the new URL to use."""
if not url_src:
log.debug("missing a URL in field %s", field_name)
return True
url_src_parsed = urlparse(url=url_src)
for pattern in REMOVE:
if pattern.search(url_src_parsed.netloc):
return False
for pattern, replacement in REPLACE.items():
if pattern.search(url_src_parsed.netloc):
new_url = url_src_parsed._replace(netloc=pattern.sub(replacement, url_src_parsed.netloc))
new_url = urlunparse(new_url)
return new_url
return True

View File

@@ -0,0 +1,90 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from __future__ import annotations
import typing
import re
from urllib.parse import parse_qsl
from flask_babel import gettext
from searx import get_setting
from searx.plugins import Plugin, PluginInfo
from searx.extended_types import sxng_request
from ._core import log
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result, LegacyResult
from searx.plugins import PluginCfg
ahmia_blacklist: list = []
def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
"""Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
If URL should be modified, the returned string is the new URL to use."""
if field_name != "url":
return True # use it unchanged
doi = extract_doi(result.parsed_url)
if doi and len(doi) < 50:
for suffix in ("/", ".pdf", ".xml", "/full", "/meta", "/abstract"):
doi = doi.removesuffix(suffix)
new_url = get_doi_resolver() + doi
if "doi" not in result:
result["doi"] = doi
log.debug("oa_doi_rewrite: [URL field: %s] %s -> %s", field_name, url_src, new_url)
return new_url # use new url
return True # use it unchanged
class SXNGPlugin(Plugin):
"""Avoid paywalls by redirecting to open-access."""
id = "oa_doi_rewrite"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Open Access DOI rewrite"),
description=gettext("Avoid paywalls by redirecting to open-access versions of publications when available"),
preference_section="general",
)
def on_result(
self,
request: "SXNG_Request",
search: "SearchWithPlugins",
result: "Result",
) -> bool: # pylint: disable=unused-argument
if result.parsed_url:
result.filter_urls(filter_url_field)
return True
regex = re.compile(r'10\.\d{4,9}/[^\s]+')
def extract_doi(url):
m = regex.search(url.path)
if m:
return m.group(0)
for _, v in parse_qsl(url.query):
m = regex.search(v)
if m:
return m.group(0)
return None
def get_doi_resolver() -> str:
doi_resolvers = get_setting("doi_resolvers")
selected_resolver = sxng_request.preferences.get_value('doi_resolver')[0]
if selected_resolver not in doi_resolvers:
selected_resolver = get_setting("default_doi_resolver")
return doi_resolvers[selected_resolver]

View File

@@ -0,0 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, missing-class-docstring
from __future__ import annotations
import typing
import re
from flask_babel import gettext
from searx.botdetection._helpers import get_real_ip
from searx.result_types import EngineResults
from . import Plugin, PluginInfo
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from . import PluginCfg
class SXNGPlugin(Plugin):
"""Simple plugin that displays information about user's request, including
the IP or HTTP User-Agent. The information is displayed in area for the
"answers".
"""
id = "self_info"
keywords = ["ip", "user-agent"]
def __init__(self, plg_cfg: "PluginCfg"):
super().__init__(plg_cfg)
self.ip_regex = re.compile(r"^ip", re.IGNORECASE)
self.ua_regex = re.compile(r"^user-agent", re.IGNORECASE)
self.info = PluginInfo(
id=self.id,
name=gettext("Self Information"),
description=gettext(
"""Displays your IP if the query is "ip" and your user agent if the query is "user-agent"."""
),
preference_section="query",
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
"""Returns a result list only for the first page."""
results = EngineResults()
if search.search_query.pageno > 1:
return results
if self.ip_regex.search(search.search_query.query):
results.add(results.types.Answer(answer=gettext("Your IP is: ") + get_real_ip(request)))
if self.ua_regex.match(search.search_query.query):
results.add(results.types.Answer(answer=gettext("Your user-agent is: ") + str(request.user_agent)))
return results

View File

@@ -0,0 +1,79 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""A plugin to check if the ip address of the request is a Tor exit-node if the
user searches for ``tor-check``. It fetches the tor exit node list from
:py:obj:`url_exit_list` and parses all the IPs into a list, then checks if the
user's IP address is in it.
"""
from __future__ import annotations
import typing
import re
from flask_babel import gettext
from httpx import HTTPError
from searx.network import get
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
from searx.botdetection import get_real_ip
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.plugins import PluginCfg
# Regex for exit node addresses in the list.
reg = re.compile(r"(?<=ExitAddress )\S+")
url_exit_list = "https://check.torproject.org/exit-addresses"
"""URL to load Tor exit list from."""
class SXNGPlugin(Plugin):
"""Rewrite hostnames, remove results or prioritize them."""
id = "tor_check"
keywords = ["tor-check", "tor_check", "torcheck", "tor", "tor check"]
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Tor check plugin"),
description=gettext(
"This plugin checks if the address of the request is a Tor exit-node, and"
" informs the user if it is; like check.torproject.org, but from SearXNG."
),
preference_section="query",
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
results = EngineResults()
if search.search_query.pageno > 1:
return results
if search.search_query.query.lower() in self.keywords:
# Request the list of tor exit nodes.
try:
resp = get(url_exit_list)
node_list = re.findall(reg, resp.text) # type: ignore
except HTTPError:
# No answer, return error
msg = gettext("Could not download the list of Tor exit-nodes from")
results.add(results.types.Answer(answer=f"{msg} {url_exit_list}"))
return results
real_ip = get_real_ip(request)
if real_ip in node_list:
msg = gettext("You are using Tor and it looks like you have the external IP address")
results.add(results.types.Answer(answer=f"{msg} {real_ip}"))
else:
msg = gettext("You are not using Tor and you have the external IP address")
results.add(results.types.Answer(answer=f"{msg} {real_ip}"))
return results

View File

@@ -0,0 +1,54 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, unused-argument
from __future__ import annotations
import logging
import typing
from flask_babel import gettext
from searx.data import TRACKER_PATTERNS
from . import Plugin, PluginInfo
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result, LegacyResult
from searx.plugins import PluginCfg
log = logging.getLogger("searx.plugins.tracker_url_remover")
class SXNGPlugin(Plugin):
"""Remove trackers arguments from the returned URL."""
id = "tracker_url_remover"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Tracker URL remover"),
description=gettext("Remove trackers arguments from the returned URL"),
preference_section="privacy",
)
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
result.filter_urls(self.filter_url_field)
return True
@classmethod
def filter_url_field(cls, result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
"""Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
If URL should be modified, the returned string is the new URL to use."""
if not url_src:
log.debug("missing a URL in field %s", field_name)
return True
return TRACKER_PATTERNS.clean_url(url=url_src)

View File

@@ -0,0 +1,157 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""A plugin for converting measured values from one unit to another unit (a
unit converter).
The plugin looks up the symbols (given in the query term) in a list of
converters, each converter is one item in the list (compare
:py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units
of measurement are evaluated. The weighting in the evaluation results from the
sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
"""
from __future__ import annotations
import typing
import re
import babel.numbers
from flask_babel import gettext, get_locale
from searx.wikidata_units import symbol_to_si
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.plugins import PluginCfg
name = ""
description = gettext("")
plugin_id = ""
preference_section = ""
CONVERT_KEYWORDS = ["in", "to", "as"]
class SXNGPlugin(Plugin):
"""Convert between units. The result is displayed in area for the
"answers".
"""
id = "unit_converter"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Unit converter plugin"),
description=gettext("Convert between units"),
preference_section="general",
)
def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults:
results = EngineResults()
# only convert between units on the first page
if search.search_query.pageno > 1:
return results
query = search.search_query.query
query_parts = query.split(" ")
if len(query_parts) < 3:
return results
for query_part in query_parts:
for keyword in CONVERT_KEYWORDS:
if query_part == keyword:
from_query, to_query = query.split(keyword, 1)
target_val = _parse_text_and_convert(from_query.strip(), to_query.strip())
if target_val:
results.add(results.types.Answer(answer=target_val))
return results
# inspired from https://stackoverflow.com/a/42475086
RE_MEASURE = r'''
(?P<sign>[-+]?) # +/- or nothing for positive
(\s*) # separator: white space or nothing
(?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)
(?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)
(\s*) # separator: white space or nothing
(?P<unit>\S+) # unit of measure
'''
def _parse_text_and_convert(from_query, to_query) -> str | None:
# pylint: disable=too-many-branches, too-many-locals
if not (from_query and to_query):
return None
measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
if not (measured and measured.group('number'), measured.group('unit')):
return None
# Symbols are not unique, if there are several hits for the from-unit, then
# the correct one must be determined by comparing it with the to-unit
# https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
# first: collecting possible units
source_list, target_list = [], []
for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
if symbol == measured.group('unit'):
source_list.append((si_name, to_si))
if symbol == to_query:
target_list.append((si_name, from_si, orig_symbol))
if not (source_list and target_list):
return None
source_to_si = target_from_si = target_symbol = None
# second: find the right unit by comparing list of from-units with list of to-units
for source in source_list:
for target in target_list:
if source[0] == target[0]: # compare si_name
source_to_si = source[1]
target_from_si = target[1]
target_symbol = target[2]
if not (source_to_si and target_from_si):
return None
_locale = get_locale() or 'en_US'
value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
value = babel.numbers.parse_decimal(value, locale=_locale)
# convert value to SI unit
if isinstance(source_to_si, (float, int)):
value = float(value) * source_to_si
else:
value = source_to_si(float(value))
# convert value from SI unit to target unit
if isinstance(target_from_si, (float, int)):
value = float(value) * target_from_si
else:
value = target_from_si(float(value))
if measured.group('E'):
# when incoming notation is scientific, outgoing notation is scientific
result = babel.numbers.format_scientific(value, locale=_locale)
else:
result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
return f'{result} {target_symbol}'