From 8fd3bdf68760f881576bb894e1a5c7eba452a24b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=92scar=20Casajuana?= Date: Fri, 5 Jan 2024 18:28:34 +0100 Subject: [PATCH] Add support for asurascans --- grabber/plainhtml.go | 49 +++++++++++++++++++++++++++++++++++--------- grabber/site.go | 2 +- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/grabber/plainhtml.go b/grabber/plainhtml.go index 792d30c..5f21086 100644 --- a/grabber/plainhtml.go +++ b/grabber/plainhtml.go @@ -15,6 +15,16 @@ type PlainHTML struct { *Grabber doc *goquery.Document rows *goquery.Selection + site SiteSelector +} + +type SiteSelector struct { + Title string + Rows string + Link string + Chapter string + ChapterTitle string + Image string } // PlainHTMLChapter represents a PlainHTML Chapter @@ -37,17 +47,33 @@ func (m *PlainHTML) Test() (bool, error) { } // order is important, since some sites have very similar selectors - selectors := []string{ + selectors := []SiteSelector{ // tcbscans.com - "main .mx-auto .grid .col-span-2 a", + { + Title: "h1", + Rows: "main .mx-auto .grid .col-span-2 a", + Chapter: ".font-bold", + ChapterTitle: ".text-gray-500", + Image: "picture img", + }, + // asuratoon.com + { + Title: "h1", + Rows: "#chapterlist ul li", + Chapter: ".chapternum", + ChapterTitle: ".chapternum", + Link: "a", + Image: "#readerarea img.ts-main-image", + }, } // for the same priority reasons, we need to iterate over the selectors // using a simple `,` joining all selectors would return missmatches for _, selector := range selectors { - rows := m.doc.Find(selector) + rows := m.doc.Find(selector.Rows) if rows.Length() > 0 { m.rows = rows + m.site = selector break } } @@ -71,7 +97,7 @@ func (m PlainHTML) FetchChapters() (chapters Filterables, errs []error) { m.rows.Each(func(i int, s *goquery.Selection) { // we need to get the chapter number from the title re := regexp.MustCompile(`Chapter\s*(\d+\.?\d*)`) - chap := re.FindStringSubmatch(s.Find(".font-bold").Text()) + chap := re.FindStringSubmatch(s.Find(m.site.Chapter).Text()) // if the chapter has no number, we skip it (these are usually site announcements) if len(chap) == 0 { return @@ -84,13 +110,16 @@ func (m PlainHTML) FetchChapters() (chapters Filterables, errs []error) { return } u := s.AttrOr("href", "") + if m.site.Link != "" { + u = s.Find(m.site.Link).AttrOr("href", "") + } if !strings.HasPrefix(u, "http") { u = m.BaseUrl() + u } - chapter := &ManganeloChapter{ + chapter := &PlainHTMLChapter{ Chapter{ Number: number, - Title: s.Find(".text-gray-500").Text(), + Title: s.Find(m.site.ChapterTitle).Text(), }, u, } @@ -103,7 +132,7 @@ func (m PlainHTML) FetchChapters() (chapters Filterables, errs []error) { // FetchChapter fetches a chapter and its pages func (m PlainHTML) FetchChapter(f Filterable) (*Chapter, error) { - mchap := f.(*ManganeloChapter) + mchap := f.(*PlainHTMLChapter) body, err := http.Get(http.RequestParams{ URL: mchap.URL, }) @@ -116,7 +145,7 @@ func (m PlainHTML) FetchChapter(f Filterable) (*Chapter, error) { return nil, err } - pimages := getPlainHTMLImageURL(doc) + pimages := getPlainHTMLImageURL(m.site.Image, doc) chapter := &Chapter{ Title: f.GetTitle(), @@ -144,9 +173,9 @@ func (m PlainHTML) FetchChapter(f Filterable) (*Chapter, error) { return chapter, nil } -func getPlainHTMLImageURL(doc *goquery.Document) []string { +func getPlainHTMLImageURL(selector string, doc *goquery.Document) []string { // images are inside picture objects - pimages := doc.Find("picture img") + pimages := doc.Find(selector) imgs := []string{} pimages.Each(func(i int, s *goquery.Selection) { src := s.AttrOr("src", "") diff --git a/grabber/site.go b/grabber/site.go index e6dff47..2d700e7 100644 --- a/grabber/site.go +++ b/grabber/site.go @@ -66,11 +66,11 @@ type Site interface { // IdentifySite returns the site passing the Test() for the specified url func (g *Grabber) IdentifySite() (Site, []error) { sites := []Site{ + &PlainHTML{Grabber: g}, &Inmanga{Grabber: g}, &Mangadex{Grabber: g}, &Tcb{Grabber: g}, &Manganelo{Grabber: g}, - &PlainHTML{Grabber: g}, } var errs []error