import asyncio, sys, traceback, re
import random
import time
import os
from lxml import etree
from urllib.parse import urlparse, urlencode
import hashlib
from DrissionPage import ChromiumOptions, Chromium, errors
from sdk import CafeSDK
async def run():
CafeSDK.Log.info("🚀 Init...")
CafeSDK.Log.info("====================================================")
CafeSDK.Log.info("🚀 CafeScraper TikTok Shop Information Scraper")
CafeSDK.Log.info("====================================================")
headers = [
{"label": "url", "key": "url", "format": "text"},
{"label": "html", "key": "html", "format": "text"},
{"label": "resp_status", "key": "resp_status", "format": "text"},
]
CafeSDK.Result.set_table_header(headers)
input_json_dict = CafeSDK.Parameter.get_input_json_dict()
CafeSDK.Log.debug(f"======input_json_dict====== {input_json_dict}")
url = input_json_dict['url']
try:
Auth = os.environ.get("PROXY_AUTH")
CafeSDK.Log.info(f"Current browser authentication info: {Auth}")
except Exception as e:
CafeSDK.Log.error(f"Failed to obtain browser authentication info: {e}")
return
browser_url = f"ws://chrome-ws-inner.cafescraper.com/ws?apiKey={Auth}"
rest_item = {"url": url, "html": "", "resp_status": "200"}
CafeSDK.Log.info("Connecting to fingerprint browser...")
co = ChromiumOptions()
co.set_address(browser_url)
try:
browser = Chromium(co)
CafeSDK.Log.info("Fingerprint browser connected successfully")
except Exception:
CafeSDK.Log.error("Failed to connect to fingerprint browser")
rest_item["resp_status"] = "403"
return
try:
page = browser.new_tab()
page.get(url)
page.wait.doc_loaded()
html = page.html
rest_item["html"] = html
CafeSDK.Result.push_data(rest_item)
except Exception as e:
CafeSDK.Log.info(f"[Error] Failed to retrieve page HTML: {e}")
rest_item["resp_status"] = "500"
if __name__ == "__main__":
asyncio.run(run())