first commit

2025-06-26 15:38:10 +03:30
commit e928faf6d2
899 changed files with 403713 additions and 0 deletions
--- a/searx/engines/wikicommons.py
+++ b/searx/engines/wikicommons.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Wikimedia Commons (images)
+
+"""
+
+import datetime
+
+from urllib.parse import urlencode
+
+from searx.utils import html_to_text, humanize_bytes
+
+# about
+about = {
+    "website": 'https://commons.wikimedia.org/',
+    "wikidata_id": 'Q565',
+    "official_api_documentation": 'https://commons.wikimedia.org/w/api.php',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+categories = ['images']
+search_type = 'images'
+
+base_url = "https://commons.wikimedia.org"
+search_prefix = (
+    '?action=query'
+    '&format=json'
+    '&generator=search'
+    '&gsrnamespace=6'
+    '&gsrprop=snippet'
+    '&prop=info|imageinfo'
+    '&iiprop=url|size|mime'
+    '&iiurlheight=180'  # needed for the thumb url
+)
+paging = True
+number_of_results = 10
+
+search_types = {
+    'images': 'bitmap|drawing',
+    'videos': 'video',
+    'audio': 'audio',
+    'files': 'multimedia|office|archive|3d',
+}
+
+
+def request(query, params):
+    language = 'en'
+    if params['language'] != 'all':
+        language = params['language'].split('-')[0]
+
+    if search_type not in search_types:
+        raise ValueError(f"Unsupported search type: {search_type}")
+
+    filetype = search_types[search_type]
+
+    args = {
+        'uselang': language,
+        'gsrlimit': number_of_results,
+        'gsroffset': number_of_results * (params["pageno"] - 1),
+        'gsrsearch': f"filetype:{filetype} {query}",
+    }
+
+    params["url"] = f"{base_url}/w/api.php{search_prefix}&{urlencode(args, safe=':|')}"
+    return params
+
+
+def response(resp):
+    results = []
+    json = resp.json()
+
+    if not json.get("query", {}).get("pages"):
+        return results
+    for item in json["query"]["pages"].values():
+        imageinfo = item["imageinfo"][0]
+        title = item["title"].replace("File:", "").rsplit('.', 1)[0]
+        result = {
+            'url': imageinfo["descriptionurl"],
+            'title': title,
+            'content': html_to_text(item["snippet"]),
+        }
+
+        if search_type == "images":
+            result['template'] = 'images.html'
+            result['img_src'] = imageinfo["url"]
+            result['thumbnail_src'] = imageinfo["thumburl"]
+            result['resolution'] = f'{imageinfo["width"]} x {imageinfo["height"]}'
+        else:
+            result['thumbnail'] = imageinfo["thumburl"]
+
+        if search_type == "videos":
+            result['template'] = 'videos.html'
+            if imageinfo.get('duration'):
+                result['length'] = datetime.timedelta(seconds=int(imageinfo['duration']))
+            result['iframe_src'] = imageinfo['url']
+        elif search_type == "files":
+            result['template'] = 'files.html'
+            result['metadata'] = imageinfo['mime']
+            result['size'] = humanize_bytes(imageinfo['size'])
+        elif search_type == "audio":
+            result['iframe_src'] = imageinfo['url']
+
+        results.append(result)
+
+    return results