Crawler

CrawlerSpec

class datahtml.base.CrawlerSpec
proxy: ProxyConf | None
abstract get(url, headers: Dict[str, Any] | None = None, timeout_secs: int = 60) CrawlResponse
abstract async aget(url, headers: Dict[str, Any] | None = None, timeout_secs: int = 60) CrawlResponse
class datahtml.base.CrawlResponse(content: bytes, url: str, headers: Dict[str, str], status_code: int)
__init__(content: bytes, url: str, headers: Dict[str, str], status_code: int)
property text
json()
property is_json
property is_xml
property is_txt

LocalCrawler

class datahtml.crawler.LocalCrawler(proxy: ProxyConf | None = None)
__init__(proxy: ProxyConf | None = None)
proxy: ProxyConf | None
get(url, headers: Dict[str, Any] | None = None, timeout_secs: int = 60) CrawlResponse
async aget(url, headers: Dict[str, Any] | None = None, timeout_secs: int = 60) CrawlResponse

AxiosCrawler