Coverage for src/crawler/by_source/ams/ams_jams_crawler.py: 28%
19 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-07-07 11:48 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-07-07 11:48 +0000
1from urllib.parse import urljoin
3from bs4 import BeautifulSoup
4from ptf.model_data import create_articledata
6from crawler.by_source.ams.ams_base_crawler import AmsCrawler
9class Ams_jamsCrawler(AmsCrawler):
10 source_domain = "AMS_JAMS"
12 def parse_issue_content(self, content, xissue):
13 if not xissue.url:
14 raise ValueError("xissue url is not set")
15 soup = BeautifulSoup(content, "html.parser")
16 articles = soup.select("article.contentList > dl > dt > a[href]")
17 for index, a in enumerate(articles):
18 article_url = a.get("href")
19 if not isinstance(article_url, str):
20 raise ValueError("Couldn't parse article url")
21 xarticle = create_articledata()
22 xarticle.url = urljoin(xissue.url, article_url)
23 xarticle.pid = "a" + str(index)
24 xissue.articles.append(xarticle)