[htmlgrab] Added support for base64 images

This commit is contained in:
Pijus Kamandulis 2021-06-09 19:07:07 +03:00
parent 01f7879f7e
commit 38f9cdeb09
5 changed files with 95 additions and 14 deletions

13
crypto/shaStr.go Normal file
View File

@ -0,0 +1,13 @@
package crypto
import (
"crypto/sha1"
"encoding/hex"
"io"
)
func ShaStr(input string) string {
h := sha1.New()
io.WriteString(h, input)
return hex.EncodeToString(h.Sum(nil))
}

View File

@ -7,18 +7,18 @@ import (
)
// DownloadFile - Download file and store it
func DownloadFile(outputFilename string, url string) {
func DownloadFile(outputFilename string, url string) error {
// Get the data
resp, err := http.Get(url)
if err != nil {
panic(err)
return err
}
defer resp.Body.Close()
// Create the file
out, err := os.Create(outputFilename)
if err != nil {
panic(err)
return err
}
defer out.Close()
@ -26,6 +26,8 @@ func DownloadFile(outputFilename string, url string) {
_, err = io.Copy(out, resp.Body)
if err != nil {
panic(err)
return err
}
return nil
}

48
fileio/saveFromBase64.go Normal file
View File

@ -0,0 +1,48 @@
package fileio
import (
"bytes"
"encoding/base64"
"fmt"
"image/jpeg"
"image/png"
"os"
"strings"
crypto "github.com/pikami/rss-dl/crypto"
)
func SaveFromBase64(imgStr string, basePath string) string {
sha1 := crypto.ShaStr(imgStr)
coI := strings.Index(string(imgStr), ",")
rawImage := string(imgStr)[coI+1:]
// Encoded Image DataUrl //
unbased, _ := base64.StdEncoding.DecodeString(string(rawImage))
res := bytes.NewReader(unbased)
switch strings.TrimSuffix(imgStr[5:coI], ";base64") {
case "image/png":
pngI, err := png.Decode(res)
if err == nil {
fileSavePath := basePath + "/" + sha1 + ".png"
f, _ := os.OpenFile(fileSavePath, os.O_WRONLY|os.O_CREATE, 0777)
png.Encode(f, pngI)
fmt.Println("[save base64] Created image: " + fileSavePath)
f.Close()
}
return sha1 + ".png"
case "image/jpeg":
jpgI, err := jpeg.Decode(res)
if err == nil {
fileSavePath := basePath + "/" + sha1 + ".jpg"
f, _ := os.OpenFile(fileSavePath, os.O_WRONLY|os.O_CREATE, 0777)
jpeg.Encode(f, jpgI, &jpeg.Options{Quality: 100})
fmt.Println("[save base64] Created image: " + fileSavePath)
f.Close()
}
return sha1 + ".jpg"
}
return "#"
}

View File

@ -30,14 +30,23 @@ func HtmlGrab(htmlStr string, itemOutputDir string) {
// For each item found, get the title
val, exists := s.Attr("src")
if exists {
imageName := helpers.RemoveGetParams(filepath.Base(val))
itemImagePath := outputDir + "/" + imageName
helpers.LogInfo("Downloading image to " + itemImagePath)
fileio.DownloadFile(
itemImagePath,
val)
imageName := "#"
if strings.Contains(val, "base64") {
imageName = fileio.SaveFromBase64(val, outputDir)
} else {
imageName = helpers.RemoveGetParams(filepath.Base(val))
itemImagePath := outputDir + "/" + imageName
helpers.LogInfo("Downloading image to " + itemImagePath)
err = fileio.DownloadFile(
itemImagePath,
val)
fmt.Printf("[htmlgrab] %d: %s\n", i, val)
if err != nil {
fmt.Printf("[htmlgrab] %d: failed to get %s\n", i, val)
} else {
fmt.Printf("[htmlgrab] %d: %s\n", i, val)
}
}
s.SetAttr("src", imageName)
}

15
main.go
View File

@ -29,7 +29,10 @@ func main() {
if feed.Image != nil {
feedImagePath := outputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(feed.Image.URL))
fileio.DownloadFile(feedImagePath, feed.Image.URL)
err := fileio.DownloadFile(feedImagePath, feed.Image.URL)
if err != nil {
panic(err)
}
}
for _, item := range feed.Items {
@ -54,17 +57,23 @@ func main() {
if item.Image != nil {
itemImagePath := itemOutputDir + "/image" + helpers.RemoveGetParams(filepath.Ext(item.Image.URL))
helpers.LogInfo("Downloading image to " + itemImagePath)
fileio.DownloadFile(
err := fileio.DownloadFile(
itemImagePath,
item.Image.URL)
if err != nil {
panic(err)
}
}
for _, enclosure := range item.Enclosures {
filename := helpers.RemoveGetParams(filepath.Base(enclosure.URL))
helpers.LogInfo("Downloading attachment '" + filename + "'")
fileio.DownloadFile(
err := fileio.DownloadFile(
itemOutputDir+"/"+filename,
enclosure.URL)
if err != nil {
panic(err)
}
}
if structs.Config.ParseHtml {