first commit
This commit is contained in:
82
searx/search/processors/__init__.py
Normal file
82
searx/search/processors/__init__.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Implement request processors used by engine-types.
|
||||
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'EngineProcessor',
|
||||
'OfflineProcessor',
|
||||
'OnlineProcessor',
|
||||
'OnlineDictionaryProcessor',
|
||||
'OnlineCurrencyProcessor',
|
||||
'OnlineUrlSearchProcessor',
|
||||
'PROCESSORS',
|
||||
]
|
||||
|
||||
import threading
|
||||
from typing import Dict
|
||||
|
||||
from searx import logger
|
||||
from searx import engines
|
||||
|
||||
from .online import OnlineProcessor
|
||||
from .offline import OfflineProcessor
|
||||
from .online_dictionary import OnlineDictionaryProcessor
|
||||
from .online_currency import OnlineCurrencyProcessor
|
||||
from .online_url_search import OnlineUrlSearchProcessor
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
logger = logger.getChild('search.processors')
|
||||
PROCESSORS: Dict[str, EngineProcessor] = {}
|
||||
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`)
|
||||
|
||||
:meta hide-value:
|
||||
"""
|
||||
|
||||
|
||||
def get_processor_class(engine_type):
|
||||
"""Return processor class according to the ``engine_type``"""
|
||||
for c in [
|
||||
OnlineProcessor,
|
||||
OfflineProcessor,
|
||||
OnlineDictionaryProcessor,
|
||||
OnlineCurrencyProcessor,
|
||||
OnlineUrlSearchProcessor,
|
||||
]:
|
||||
if c.engine_type == engine_type:
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
def get_processor(engine, engine_name):
|
||||
"""Return processor instance that fits to ``engine.engine.type``)"""
|
||||
engine_type = getattr(engine, 'engine_type', 'online')
|
||||
processor_class = get_processor_class(engine_type)
|
||||
if processor_class:
|
||||
return processor_class(engine, engine_name)
|
||||
return None
|
||||
|
||||
|
||||
def initialize_processor(processor):
|
||||
"""Initialize one processor
|
||||
|
||||
Call the init function of the engine
|
||||
"""
|
||||
if processor.has_initialize_function:
|
||||
t = threading.Thread(target=processor.initialize, daemon=True)
|
||||
t.start()
|
||||
|
||||
|
||||
def initialize(engine_list):
|
||||
"""Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
|
||||
for engine_data in engine_list:
|
||||
engine_name = engine_data['name']
|
||||
engine = engines.engines.get(engine_name)
|
||||
if engine:
|
||||
processor = get_processor(engine, engine_name)
|
||||
initialize_processor(processor)
|
||||
if processor is None:
|
||||
engine.logger.error('Error get processor for engine %s', engine_name)
|
||||
else:
|
||||
PROCESSORS[engine_name] = processor
|
||||
195
searx/search/processors/abstract.py
Normal file
195
searx/search/processors/abstract.py
Normal file
@@ -0,0 +1,195 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Abstract base classes for engine request processors.
|
||||
|
||||
"""
|
||||
|
||||
import threading
|
||||
from abc import abstractmethod, ABC
|
||||
from timeit import default_timer
|
||||
from typing import Dict, Union
|
||||
|
||||
from searx import settings, logger
|
||||
from searx.engines import engines
|
||||
from searx.network import get_time_for_thread, get_network
|
||||
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
|
||||
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
|
||||
from searx.utils import get_engine_from_settings
|
||||
|
||||
logger = logger.getChild('searx.search.processor')
|
||||
SUSPENDED_STATUS: Dict[Union[int, str], 'SuspendedStatus'] = {}
|
||||
|
||||
|
||||
class SuspendedStatus:
|
||||
"""Class to handle suspend state."""
|
||||
|
||||
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
|
||||
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
self.continuous_errors = 0
|
||||
self.suspend_end_time = 0
|
||||
self.suspend_reason = None
|
||||
|
||||
@property
|
||||
def is_suspended(self):
|
||||
return self.suspend_end_time >= default_timer()
|
||||
|
||||
def suspend(self, suspended_time, suspend_reason):
|
||||
with self.lock:
|
||||
# update continuous_errors / suspend_end_time
|
||||
self.continuous_errors += 1
|
||||
if suspended_time is None:
|
||||
suspended_time = min(
|
||||
settings['search']['max_ban_time_on_fail'],
|
||||
self.continuous_errors * settings['search']['ban_time_on_fail'],
|
||||
)
|
||||
self.suspend_end_time = default_timer() + suspended_time
|
||||
self.suspend_reason = suspend_reason
|
||||
logger.debug('Suspend for %i seconds', suspended_time)
|
||||
|
||||
def resume(self):
|
||||
with self.lock:
|
||||
# reset the suspend variables
|
||||
self.continuous_errors = 0
|
||||
self.suspend_end_time = 0
|
||||
self.suspend_reason = None
|
||||
|
||||
|
||||
class EngineProcessor(ABC):
|
||||
"""Base classes used for all types of request processors."""
|
||||
|
||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
|
||||
|
||||
def __init__(self, engine, engine_name: str):
|
||||
self.engine = engine
|
||||
self.engine_name = engine_name
|
||||
self.logger = engines[engine_name].logger
|
||||
key = get_network(self.engine_name)
|
||||
key = id(key) if key else self.engine_name
|
||||
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
|
||||
|
||||
def initialize(self):
|
||||
try:
|
||||
self.engine.init(get_engine_from_settings(self.engine_name))
|
||||
except SearxEngineResponseException as exc:
|
||||
self.logger.warning('Fail to initialize // %s', exc)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
self.logger.exception('Fail to initialize')
|
||||
else:
|
||||
self.logger.debug('Initialized')
|
||||
|
||||
@property
|
||||
def has_initialize_function(self):
|
||||
return hasattr(self.engine, 'init')
|
||||
|
||||
def handle_exception(self, result_container, exception_or_message, suspend=False):
|
||||
# update result_container
|
||||
if isinstance(exception_or_message, BaseException):
|
||||
exception_class = exception_or_message.__class__
|
||||
module_name = getattr(exception_class, '__module__', 'builtins')
|
||||
module_name = '' if module_name == 'builtins' else module_name + '.'
|
||||
error_message = module_name + exception_class.__qualname__
|
||||
else:
|
||||
error_message = exception_or_message
|
||||
result_container.add_unresponsive_engine(self.engine_name, error_message)
|
||||
# metrics
|
||||
counter_inc('engine', self.engine_name, 'search', 'count', 'error')
|
||||
if isinstance(exception_or_message, BaseException):
|
||||
count_exception(self.engine_name, exception_or_message)
|
||||
else:
|
||||
count_error(self.engine_name, exception_or_message)
|
||||
# suspend the engine ?
|
||||
if suspend:
|
||||
suspended_time = None
|
||||
if isinstance(exception_or_message, SearxEngineAccessDeniedException):
|
||||
suspended_time = exception_or_message.suspended_time
|
||||
self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member
|
||||
|
||||
def _extend_container_basic(self, result_container, start_time, search_results):
|
||||
# update result_container
|
||||
result_container.extend(self.engine_name, search_results)
|
||||
engine_time = default_timer() - start_time
|
||||
page_load_time = get_time_for_thread()
|
||||
result_container.add_timing(self.engine_name, engine_time, page_load_time)
|
||||
# metrics
|
||||
counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
|
||||
histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
|
||||
if page_load_time is not None:
|
||||
histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
|
||||
|
||||
def extend_container(self, result_container, start_time, search_results):
|
||||
if getattr(threading.current_thread(), '_timeout', False):
|
||||
# the main thread is not waiting anymore
|
||||
self.handle_exception(result_container, 'timeout', None)
|
||||
else:
|
||||
# check if the engine accepted the request
|
||||
if search_results is not None:
|
||||
self._extend_container_basic(result_container, start_time, search_results)
|
||||
self.suspended_status.resume()
|
||||
|
||||
def extend_container_if_suspended(self, result_container):
|
||||
if self.suspended_status.is_suspended:
|
||||
result_container.add_unresponsive_engine(
|
||||
self.engine_name, self.suspended_status.suspend_reason, suspended=True
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
|
||||
``None`` if request is not supported.
|
||||
|
||||
Not supported conditions (``None`` is returned):
|
||||
|
||||
- A page-number > 1 when engine does not support paging.
|
||||
- A time range when the engine does not support time range.
|
||||
"""
|
||||
# if paging is not supported, skip
|
||||
if search_query.pageno > 1 and not self.engine.paging:
|
||||
return None
|
||||
|
||||
# if max page is reached, skip
|
||||
max_page = self.engine.max_page or settings['search']['max_page']
|
||||
if max_page and max_page < search_query.pageno:
|
||||
return None
|
||||
|
||||
# if time_range is not supported, skip
|
||||
if search_query.time_range and not self.engine.time_range_support:
|
||||
return None
|
||||
|
||||
params = {}
|
||||
params["query"] = search_query.query
|
||||
params['category'] = engine_category
|
||||
params['pageno'] = search_query.pageno
|
||||
params['safesearch'] = search_query.safesearch
|
||||
params['time_range'] = search_query.time_range
|
||||
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
|
||||
params['searxng_locale'] = search_query.lang
|
||||
|
||||
# deprecated / vintage --> use params['searxng_locale']
|
||||
#
|
||||
# Conditions related to engine's traits are implemented in engine.traits
|
||||
# module. Don't do 'locale' decisions here in the abstract layer of the
|
||||
# search processor, just pass the value from user's choice unchanged to
|
||||
# the engine request.
|
||||
|
||||
if hasattr(self.engine, 'language') and self.engine.language:
|
||||
params['language'] = self.engine.language
|
||||
else:
|
||||
params['language'] = search_query.lang
|
||||
|
||||
return params
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query, params, result_container, start_time, timeout_limit):
|
||||
pass
|
||||
|
||||
def get_tests(self):
|
||||
tests = getattr(self.engine, 'tests', None)
|
||||
if tests is None:
|
||||
tests = getattr(self.engine, 'additional_tests', {})
|
||||
tests.update(self.get_default_tests())
|
||||
return tests
|
||||
|
||||
def get_default_tests(self):
|
||||
return {}
|
||||
26
searx/search/processors/offline.py
Normal file
26
searx/search/processors/offline.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Processors for engine-type: ``offline``
|
||||
|
||||
"""
|
||||
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
|
||||
class OfflineProcessor(EngineProcessor):
|
||||
"""Processor class used by ``offline`` engines"""
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
def _search_basic(self, query, params):
|
||||
return self.engine.search(query, params)
|
||||
|
||||
def search(self, query, params, result_container, start_time, timeout_limit):
|
||||
try:
|
||||
search_results = self._search_basic(query, params)
|
||||
self.extend_container(result_container, start_time, search_results)
|
||||
except ValueError as e:
|
||||
# do not record the error
|
||||
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.handle_exception(result_container, e)
|
||||
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
|
||||
233
searx/search/processors/online.py
Normal file
233
searx/search/processors/online.py
Normal file
@@ -0,0 +1,233 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Processors for engine-type: ``online``
|
||||
|
||||
"""
|
||||
# pylint: disable=use-dict-literal
|
||||
|
||||
from timeit import default_timer
|
||||
import asyncio
|
||||
import ssl
|
||||
import httpx
|
||||
|
||||
import searx.network
|
||||
from searx.utils import gen_useragent
|
||||
from searx.exceptions import (
|
||||
SearxEngineAccessDeniedException,
|
||||
SearxEngineCaptchaException,
|
||||
SearxEngineTooManyRequestsException,
|
||||
)
|
||||
from searx.metrics.error_recorder import count_error
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
|
||||
def default_request_params():
|
||||
"""Default request parameters for ``online`` engines."""
|
||||
return {
|
||||
# fmt: off
|
||||
'method': 'GET',
|
||||
'headers': {},
|
||||
'data': {},
|
||||
'url': '',
|
||||
'cookies': {},
|
||||
'auth': None
|
||||
# fmt: on
|
||||
}
|
||||
|
||||
|
||||
class OnlineProcessor(EngineProcessor):
|
||||
"""Processor class for ``online`` engines."""
|
||||
|
||||
engine_type = 'online'
|
||||
|
||||
def initialize(self):
|
||||
# set timeout for all HTTP requests
|
||||
searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer())
|
||||
# reset the HTTP total time
|
||||
searx.network.reset_time_for_thread()
|
||||
# set the network
|
||||
searx.network.set_context_network_name(self.engine_name)
|
||||
super().initialize()
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
"""Returns a set of :ref:`request params <engine request online>` or ``None``
|
||||
if request is not supported.
|
||||
"""
|
||||
params = super().get_params(search_query, engine_category)
|
||||
if params is None:
|
||||
return None
|
||||
|
||||
# add default params
|
||||
params.update(default_request_params())
|
||||
|
||||
# add an user agent
|
||||
params['headers']['User-Agent'] = gen_useragent()
|
||||
|
||||
# add Accept-Language header
|
||||
if self.engine.send_accept_language_header and search_query.locale:
|
||||
ac_lang = search_query.locale.language
|
||||
if search_query.locale.territory:
|
||||
ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
|
||||
search_query.locale.language,
|
||||
search_query.locale.territory,
|
||||
search_query.locale.language,
|
||||
)
|
||||
params['headers']['Accept-Language'] = ac_lang
|
||||
|
||||
self.logger.debug('HTTP Accept-Language: %s', params['headers'].get('Accept-Language', ''))
|
||||
return params
|
||||
|
||||
def _send_http_request(self, params):
|
||||
# create dictionary which contain all
|
||||
# information about the request
|
||||
request_args = dict(headers=params['headers'], cookies=params['cookies'], auth=params['auth'])
|
||||
|
||||
# verify
|
||||
# if not None, it overrides the verify value defined in the network.
|
||||
# use False to accept any server certificate
|
||||
# use a path to file to specify a server certificate
|
||||
verify = params.get('verify')
|
||||
if verify is not None:
|
||||
request_args['verify'] = params['verify']
|
||||
|
||||
# max_redirects
|
||||
max_redirects = params.get('max_redirects')
|
||||
if max_redirects:
|
||||
request_args['max_redirects'] = max_redirects
|
||||
|
||||
# allow_redirects
|
||||
if 'allow_redirects' in params:
|
||||
request_args['allow_redirects'] = params['allow_redirects']
|
||||
|
||||
# soft_max_redirects
|
||||
soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
|
||||
|
||||
# raise_for_status
|
||||
request_args['raise_for_httperror'] = params.get('raise_for_httperror', True)
|
||||
|
||||
# specific type of request (GET or POST)
|
||||
if params['method'] == 'GET':
|
||||
req = searx.network.get
|
||||
else:
|
||||
req = searx.network.post
|
||||
|
||||
request_args['data'] = params['data']
|
||||
|
||||
# send the request
|
||||
response = req(params['url'], **request_args)
|
||||
|
||||
# check soft limit of the redirect count
|
||||
if len(response.history) > soft_max_redirects:
|
||||
# unexpected redirect : record an error
|
||||
# but the engine might still return valid results.
|
||||
status_code = str(response.status_code or '')
|
||||
reason = response.reason_phrase or ''
|
||||
hostname = response.url.host
|
||||
count_error(
|
||||
self.engine_name,
|
||||
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
|
||||
(status_code, reason, hostname),
|
||||
secondary=True,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _search_basic(self, query, params):
|
||||
# update request parameters dependent on
|
||||
# search-engine (contained in engines folder)
|
||||
self.engine.request(query, params)
|
||||
|
||||
# ignoring empty urls
|
||||
if not params['url']:
|
||||
return None
|
||||
|
||||
# send request
|
||||
response = self._send_http_request(params)
|
||||
|
||||
# parse the response
|
||||
response.search_params = params
|
||||
return self.engine.response(response)
|
||||
|
||||
def search(self, query, params, result_container, start_time, timeout_limit):
|
||||
# set timeout for all HTTP requests
|
||||
searx.network.set_timeout_for_thread(timeout_limit, start_time=start_time)
|
||||
# reset the HTTP total time
|
||||
searx.network.reset_time_for_thread()
|
||||
# set the network
|
||||
searx.network.set_context_network_name(self.engine_name)
|
||||
|
||||
try:
|
||||
# send requests and parse the results
|
||||
search_results = self._search_basic(query, params)
|
||||
self.extend_container(result_container, start_time, search_results)
|
||||
except ssl.SSLError as e:
|
||||
# requests timeout (connect or read)
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine_name).verify))
|
||||
except (httpx.TimeoutException, asyncio.TimeoutError) as e:
|
||||
# requests timeout (connect or read)
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.error(
|
||||
"HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format(
|
||||
default_timer() - start_time, timeout_limit, e.__class__.__name__
|
||||
)
|
||||
)
|
||||
except (httpx.HTTPError, httpx.StreamError) as e:
|
||||
# other requests exception
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception(
|
||||
"requests exception (search duration : {0} s, timeout: {1} s) : {2}".format(
|
||||
default_timer() - start_time, timeout_limit, e
|
||||
)
|
||||
)
|
||||
except SearxEngineCaptchaException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('CAPTCHA')
|
||||
except SearxEngineTooManyRequestsException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('Too many requests')
|
||||
except SearxEngineAccessDeniedException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
self.logger.exception('SearXNG is blocked')
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.handle_exception(result_container, e)
|
||||
self.logger.exception('exception : {0}'.format(e))
|
||||
|
||||
def get_default_tests(self):
|
||||
tests = {}
|
||||
|
||||
tests['simple'] = {
|
||||
'matrix': {'query': ('life', 'computer')},
|
||||
'result_container': ['not_empty'],
|
||||
}
|
||||
|
||||
if getattr(self.engine, 'paging', False):
|
||||
tests['paging'] = {
|
||||
'matrix': {'query': 'time', 'pageno': (1, 2, 3)},
|
||||
'result_container': ['not_empty'],
|
||||
'test': ['unique_results'],
|
||||
}
|
||||
if 'general' in self.engine.categories:
|
||||
# avoid documentation about HTML tags (<time> and <input type="time">)
|
||||
tests['paging']['matrix']['query'] = 'news'
|
||||
|
||||
if getattr(self.engine, 'time_range', False):
|
||||
tests['time_range'] = {
|
||||
'matrix': {'query': 'news', 'time_range': (None, 'day')},
|
||||
'result_container': ['not_empty'],
|
||||
'test': ['unique_results'],
|
||||
}
|
||||
|
||||
if getattr(self.engine, 'traits', False):
|
||||
tests['lang_fr'] = {
|
||||
'matrix': {'query': 'paris', 'lang': 'fr'},
|
||||
'result_container': ['not_empty', ('has_language', 'fr')],
|
||||
}
|
||||
tests['lang_en'] = {
|
||||
'matrix': {'query': 'paris', 'lang': 'en'},
|
||||
'result_container': ['not_empty', ('has_language', 'en')],
|
||||
}
|
||||
|
||||
if getattr(self.engine, 'safesearch', False):
|
||||
tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']}
|
||||
|
||||
return tests
|
||||
63
searx/search/processors/online_currency.py
Normal file
63
searx/search/processors/online_currency.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Processors for engine-type: ``online_currency``
|
||||
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
from searx.data import CURRENCIES
|
||||
from .online import OnlineProcessor
|
||||
|
||||
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||
|
||||
|
||||
def normalize_name(name: str):
|
||||
name = name.strip()
|
||||
name = name.lower().replace('-', ' ').rstrip('s')
|
||||
name = re.sub(' +', ' ', name)
|
||||
return unicodedata.normalize('NFKD', name).lower()
|
||||
|
||||
|
||||
class OnlineCurrencyProcessor(OnlineProcessor):
|
||||
"""Processor class used by ``online_currency`` engines."""
|
||||
|
||||
engine_type = 'online_currency'
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
"""Returns a set of :ref:`request params <engine request online_currency>`
|
||||
or ``None`` if search query does not match to :py:obj:`parser_re`."""
|
||||
|
||||
params = super().get_params(search_query, engine_category)
|
||||
if params is None:
|
||||
return None
|
||||
|
||||
m = parser_re.match(search_query.query)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
amount_str, from_currency, to_currency = m.groups()
|
||||
try:
|
||||
amount = float(amount_str)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency))
|
||||
to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency))
|
||||
|
||||
params['amount'] = amount
|
||||
params['from'] = from_currency
|
||||
params['to'] = to_currency
|
||||
params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
|
||||
params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
|
||||
return params
|
||||
|
||||
def get_default_tests(self):
|
||||
tests = {}
|
||||
|
||||
tests['currency'] = {
|
||||
'matrix': {'query': '1337 usd in rmb'},
|
||||
'result_container': ['has_answer'],
|
||||
}
|
||||
|
||||
return tests
|
||||
60
searx/search/processors/online_dictionary.py
Normal file
60
searx/search/processors/online_dictionary.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Processors for engine-type: ``online_dictionary``
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from searx.utils import is_valid_lang
|
||||
from .online import OnlineProcessor
|
||||
|
||||
parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.+)$', re.I)
|
||||
|
||||
|
||||
class OnlineDictionaryProcessor(OnlineProcessor):
|
||||
"""Processor class used by ``online_dictionary`` engines."""
|
||||
|
||||
engine_type = 'online_dictionary'
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
"""Returns a set of :ref:`request params <engine request online_dictionary>` or
|
||||
``None`` if search query does not match to :py:obj:`parser_re`.
|
||||
"""
|
||||
params = super().get_params(search_query, engine_category)
|
||||
if params is None:
|
||||
return None
|
||||
|
||||
m = parser_re.match(search_query.query)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
from_lang, to_lang, query = m.groups()
|
||||
|
||||
from_lang = is_valid_lang(from_lang)
|
||||
to_lang = is_valid_lang(to_lang)
|
||||
|
||||
if not from_lang or not to_lang:
|
||||
return None
|
||||
|
||||
params['from_lang'] = from_lang
|
||||
params['to_lang'] = to_lang
|
||||
params['query'] = query
|
||||
|
||||
return params
|
||||
|
||||
def get_default_tests(self):
|
||||
tests = {}
|
||||
|
||||
if getattr(self.engine, 'paging', False):
|
||||
tests['translation_paging'] = {
|
||||
'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)},
|
||||
'result_container': ['not_empty', ('one_title_contains', 'house')],
|
||||
'test': ['unique_results'],
|
||||
}
|
||||
else:
|
||||
tests['translation'] = {
|
||||
'matrix': {'query': 'en-es house'},
|
||||
'result_container': ['not_empty', ('one_title_contains', 'house')],
|
||||
}
|
||||
|
||||
return tests
|
||||
45
searx/search/processors/online_url_search.py
Normal file
45
searx/search/processors/online_url_search.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Processors for engine-type: ``online_url_search``
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
from .online import OnlineProcessor
|
||||
|
||||
re_search_urls = {
|
||||
'http': re.compile(r'https?:\/\/[^ ]*'),
|
||||
'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
|
||||
'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
|
||||
}
|
||||
|
||||
|
||||
class OnlineUrlSearchProcessor(OnlineProcessor):
|
||||
"""Processor class used by ``online_url_search`` engines."""
|
||||
|
||||
engine_type = 'online_url_search'
|
||||
|
||||
def get_params(self, search_query, engine_category):
|
||||
"""Returns a set of :ref:`request params <engine request online>` or ``None`` if
|
||||
search query does not match to :py:obj:`re_search_urls`.
|
||||
"""
|
||||
|
||||
params = super().get_params(search_query, engine_category)
|
||||
if params is None:
|
||||
return None
|
||||
|
||||
url_match = False
|
||||
search_urls = {}
|
||||
|
||||
for k, v in re_search_urls.items():
|
||||
m = v.search(search_query.query)
|
||||
v = None
|
||||
if m:
|
||||
url_match = True
|
||||
v = m[0]
|
||||
search_urls[k] = v
|
||||
|
||||
if not url_match:
|
||||
return None
|
||||
|
||||
params['search_urls'] = search_urls
|
||||
return params
|
||||
Reference in New Issue
Block a user