first commit
This commit is contained in:
104
searx/engines/wikicommons.py
Normal file
104
searx/engines/wikicommons.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Wikimedia Commons (images)
|
||||
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.utils import html_to_text, humanize_bytes
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://commons.wikimedia.org/',
|
||||
"wikidata_id": 'Q565',
|
||||
"official_api_documentation": 'https://commons.wikimedia.org/w/api.php',
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
categories = ['images']
|
||||
search_type = 'images'
|
||||
|
||||
base_url = "https://commons.wikimedia.org"
|
||||
search_prefix = (
|
||||
'?action=query'
|
||||
'&format=json'
|
||||
'&generator=search'
|
||||
'&gsrnamespace=6'
|
||||
'&gsrprop=snippet'
|
||||
'&prop=info|imageinfo'
|
||||
'&iiprop=url|size|mime'
|
||||
'&iiurlheight=180' # needed for the thumb url
|
||||
)
|
||||
paging = True
|
||||
number_of_results = 10
|
||||
|
||||
search_types = {
|
||||
'images': 'bitmap|drawing',
|
||||
'videos': 'video',
|
||||
'audio': 'audio',
|
||||
'files': 'multimedia|office|archive|3d',
|
||||
}
|
||||
|
||||
|
||||
def request(query, params):
|
||||
language = 'en'
|
||||
if params['language'] != 'all':
|
||||
language = params['language'].split('-')[0]
|
||||
|
||||
if search_type not in search_types:
|
||||
raise ValueError(f"Unsupported search type: {search_type}")
|
||||
|
||||
filetype = search_types[search_type]
|
||||
|
||||
args = {
|
||||
'uselang': language,
|
||||
'gsrlimit': number_of_results,
|
||||
'gsroffset': number_of_results * (params["pageno"] - 1),
|
||||
'gsrsearch': f"filetype:{filetype} {query}",
|
||||
}
|
||||
|
||||
params["url"] = f"{base_url}/w/api.php{search_prefix}&{urlencode(args, safe=':|')}"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
json = resp.json()
|
||||
|
||||
if not json.get("query", {}).get("pages"):
|
||||
return results
|
||||
for item in json["query"]["pages"].values():
|
||||
imageinfo = item["imageinfo"][0]
|
||||
title = item["title"].replace("File:", "").rsplit('.', 1)[0]
|
||||
result = {
|
||||
'url': imageinfo["descriptionurl"],
|
||||
'title': title,
|
||||
'content': html_to_text(item["snippet"]),
|
||||
}
|
||||
|
||||
if search_type == "images":
|
||||
result['template'] = 'images.html'
|
||||
result['img_src'] = imageinfo["url"]
|
||||
result['thumbnail_src'] = imageinfo["thumburl"]
|
||||
result['resolution'] = f'{imageinfo["width"]} x {imageinfo["height"]}'
|
||||
else:
|
||||
result['thumbnail'] = imageinfo["thumburl"]
|
||||
|
||||
if search_type == "videos":
|
||||
result['template'] = 'videos.html'
|
||||
if imageinfo.get('duration'):
|
||||
result['length'] = datetime.timedelta(seconds=int(imageinfo['duration']))
|
||||
result['iframe_src'] = imageinfo['url']
|
||||
elif search_type == "files":
|
||||
result['template'] = 'files.html'
|
||||
result['metadata'] = imageinfo['mime']
|
||||
result['size'] = humanize_bytes(imageinfo['size'])
|
||||
elif search_type == "audio":
|
||||
result['iframe_src'] = imageinfo['url']
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user