[htmlgrab] Added support for base64 images

This commit is contained in:
Pijus Kamandulis 2021-06-09 19:07:07 +03:00
parent 01f7879f7e
commit 38f9cdeb09
5 changed files with 95 additions and 14 deletions

13
crypto/shaStr.go Normal file
View File

@ -0,0 +1,13 @@
package crypto
import (
"crypto/sha1"
"encoding/hex"
"io"
)
func ShaStr(input string) string {
h := sha1.New()
io.WriteString(h, input)
return hex.EncodeToString(h.Sum(nil))
}

View File

@ -7,18 +7,18 @@ import (
) )
// DownloadFile - Download file and store it // DownloadFile - Download file and store it
func DownloadFile(outputFilename string, url string) { func DownloadFile(outputFilename string, url string) error {
// Get the data // Get the data
resp, err := http.Get(url) resp, err := http.Get(url)
if err != nil { if err != nil {
panic(err) return err
} }
defer resp.Body.Close() defer resp.Body.Close()
// Create the file // Create the file
out, err := os.Create(outputFilename) out, err := os.Create(outputFilename)
if err != nil { if err != nil {
panic(err) return err
} }
defer out.Close() defer out.Close()
@ -26,6 +26,8 @@ func DownloadFile(outputFilename string, url string) {
_, err = io.Copy(out, resp.Body) _, err = io.Copy(out, resp.Body)
if err != nil { if err != nil {
panic(err) return err
} }
return nil
} }

48
fileio/saveFromBase64.go Normal file
View File

@ -0,0 +1,48 @@
package fileio
import (
"bytes"
"encoding/base64"
"fmt"
"image/jpeg"
"image/png"
"os"
"strings"
crypto "github.com/pikami/rss-dl/crypto"
)
func SaveFromBase64(imgStr string, basePath string) string {
sha1 := crypto.ShaStr(imgStr)
coI := strings.Index(string(imgStr), ",")
rawImage := string(imgStr)[coI+1:]
// Encoded Image DataUrl //
unbased, _ := base64.StdEncoding.DecodeString(string(rawImage))
res := bytes.NewReader(unbased)
switch strings.TrimSuffix(imgStr[5:coI], ";base64") {
case "image/png":
pngI, err := png.Decode(res)
if err == nil {
fileSavePath := basePath + "/" + sha1 + ".png"
f, _ := os.OpenFile(fileSavePath, os.O_WRONLY|os.O_CREATE, 0777)
png.Encode(f, pngI)
fmt.Println("[save base64] Created image: " + fileSavePath)
f.Close()
}
return sha1 + ".png"
case "image/jpeg":
jpgI, err := jpeg.Decode(res)
if err == nil {
fileSavePath := basePath + "/" + sha1 + ".jpg"
f, _ := os.OpenFile(fileSavePath, os.O_WRONLY|os.O_CREATE, 0777)
jpeg.Encode(f, jpgI, &jpeg.Options{Quality: 100})
fmt.Println("[save base64] Created image: " + fileSavePath)
f.Close()
}
return sha1 + ".jpg"
}
return "#"
}

View File

@ -30,14 +30,23 @@ func HtmlGrab(htmlStr string, itemOutputDir string) {
// For each item found, get the title // For each item found, get the title
val, exists := s.Attr("src") val, exists := s.Attr("src")
if exists { if exists {
imageName := helpers.RemoveGetParams(filepath.Base(val)) imageName := "#"
itemImagePath := outputDir + "/" + imageName if strings.Contains(val, "base64") {
helpers.LogInfo("Downloading image to " + itemImagePath) imageName = fileio.SaveFromBase64(val, outputDir)
fileio.DownloadFile( } else {
itemImagePath, imageName = helpers.RemoveGetParams(filepath.Base(val))
val) itemImagePath := outputDir + "/" + imageName
helpers.LogInfo("Downloading image to " + itemImagePath)
err = fileio.DownloadFile(
itemImagePath,
val)
fmt.Printf("[htmlgrab] %d: %s\n", i, val) if err != nil {
fmt.Printf("[htmlgrab] %d: failed to get %s\n", i, val)
} else {
fmt.Printf("[htmlgrab] %d: %s\n", i, val)
}
}
s.SetAttr("src", imageName) s.SetAttr("src", imageName)
} }

15
main.go
View File

@ -29,7 +29,10 @@ func main() {
if feed.Image != nil { if feed.Image != nil {
feedImagePath := outputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(feed.Image.URL)) feedImagePath := outputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(feed.Image.URL))
fileio.DownloadFile(feedImagePath, feed.Image.URL) err := fileio.DownloadFile(feedImagePath, feed.Image.URL)
if err != nil {
panic(err)
}
} }
for _, item := range feed.Items { for _, item := range feed.Items {
@ -54,17 +57,23 @@ func main() {
if item.Image != nil { if item.Image != nil {
itemImagePath := itemOutputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(item.Image.URL)) itemImagePath := itemOutputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(item.Image.URL))
helpers.LogInfo("Downloading image to " + itemImagePath) helpers.LogInfo("Downloading image to " + itemImagePath)
fileio.DownloadFile( err := fileio.DownloadFile(
itemImagePath, itemImagePath,
item.Image.URL) item.Image.URL)
if err != nil {
panic(err)
}
} }
for _, enclosure := range item.Enclosures { for _, enclosure := range item.Enclosures {
filename := helpers.RemoveGetParams(filepath.Base(enclosure.URL)) filename := helpers.RemoveGetParams(filepath.Base(enclosure.URL))
helpers.LogInfo("Downloading attachment '" + filename + "'") helpers.LogInfo("Downloading attachment '" + filename + "'")
fileio.DownloadFile( err := fileio.DownloadFile(
itemOutputDir+"/"+filename, itemOutputDir+"/"+filename,
enclosure.URL) enclosure.URL)
if err != nil {
panic(err)
}
} }
if structs.Config.ParseHtml { if structs.Config.ParseHtml {