Improved status output

Added `-quiet` flag

Move out error messages to separate file
This commit is contained in:
Pijus Kamandulis 2020-02-08 01:51:17 +02:00
parent 673bbe1340
commit 1b3f985f42
21 changed files with 182 additions and 82 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ downloads
*.exe *.exe
tiktok-dl tiktok-dl
batch_file.txt batch_file.txt
debug.log

View File

@ -21,6 +21,7 @@ Clone this repository and run `go build` to build the executable.
* `-metadata` - Write video metadata to a .json file * `-metadata` - Write video metadata to a .json file
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500) * `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output
## Acknowledgments ## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -8,22 +8,20 @@ import (
"os" "os"
"time" "time"
models "../models" config "../models/config"
utils "../utils" utils "../utils"
) )
// GetMusicUploads - Get all uploads by given music // GetMusicUploads - Get all uploads by given music
func executeClientAction(url string, jsAction string) string { func executeClientAction(url string, jsAction string) string {
dir, err := ioutil.TempDir("", "chromedp-example") dir, err := ioutil.TempDir("", "chromedp-example")
if err != nil { utils.CheckErr(err)
panic(err)
}
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:], opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU, chromedp.DisableGPU,
chromedp.UserDataDir(dir), chromedp.UserDataDir(dir),
chromedp.Flag("headless", !models.Config.Debug), chromedp.Flag("headless", !config.Config.Debug),
) )
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
@ -35,11 +33,18 @@ func executeClientAction(url string, jsAction string) string {
) )
defer cancel() defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(models.Config.Deadline)*time.Second) ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel() defer cancel()
var jsOutput string var jsOutput string
err = chromedp.Run(ctx, jsOutput = runScrapeWithInfo(ctx, jsAction, url)
return jsOutput
}
func runScrapeQuiet(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page // Navigate to user's page
chromedp.Navigate(url), chromedp.Navigate(url),
// Execute url grabber script // Execute url grabber script
@ -50,9 +55,47 @@ func executeClientAction(url string, jsAction string) string {
// Grab url links from our element // Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput), chromedp.InnerHTML(`video_urls`, &jsOutput),
) )
if err != nil { utils.CheckErr(err)
log.Fatal(err) return jsOutput
}
func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput != "0" {
utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput)
} else {
utils.Logf("\rPreloading...")
} }
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput == "true" {
break
}
time.Sleep(50 * time.Millisecond)
}
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput return jsOutput
} }

View File

@ -1,14 +1,14 @@
package main package main
import ( import (
models "./models" config "./models/config"
workflows "./workflows" workflows "./workflows"
) )
func main() { func main() {
models.GetConfig() config.GetConfig()
url := models.Config.URL url := config.Config.URL
batchFilePath := models.Config.BatchFilePath batchFilePath := config.Config.BatchFilePath
// Batch file // Batch file
if workflows.CanUseDownloadBatchFile(batchFilePath) { if workflows.CanUseDownloadBatchFile(batchFilePath) {

View File

@ -1,11 +1,9 @@
package models package config
import ( import (
"flag" "flag"
"fmt" "fmt"
"os" "os"
"regexp"
"strings"
) )
// Config - Runtime configuration // Config - Runtime configuration
@ -15,6 +13,7 @@ var Config struct {
BatchFilePath string BatchFilePath string
Debug bool Debug bool
MetaData bool MetaData bool
Quiet bool
Deadline int Deadline int
} }
@ -24,6 +23,7 @@ func GetConfig() {
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.") batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
debug := flag.Bool("debug", false, "Enables debug mode") debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)") deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
flag.Parse() flag.Parse()
@ -43,24 +43,6 @@ func GetConfig() {
Config.BatchFilePath = *batchFilePath Config.BatchFilePath = *batchFilePath
Config.Debug = *debug Config.Debug = *debug
Config.MetaData = *metadata Config.MetaData = *metadata
Config.Quiet = *quiet
Config.Deadline = *deadline Config.Deadline = *deadline
} }
// GetUsername - Get's username from passed URL param
func GetUsername() string {
return GetUsernameFromString(Config.URL)
}
// GetUsernameFromString - Get's username from passed param
func GetUsernameFromString(str string) string {
if match := strings.Contains(str, "/"); !match { // Not url
return strings.Replace(str, "@", "", -1)
}
if match, _ := regexp.MatchString(".+tiktok\\.com/@.+", str); match { // URL
stripedSuffix := strings.Split(str, "@")[1]
return strings.Split(stripedSuffix, "/")[0]
}
panic("Could not recognise URL format")
}

View File

@ -1,8 +1,9 @@
package models package models
import ( import (
res "../resources"
utils "../utils"
"encoding/json" "encoding/json"
"fmt"
"os" "os"
"strings" "strings"
) )
@ -46,21 +47,16 @@ func (u Upload) GetUploadID() string {
func (u Upload) WriteToFile(outputPath string) { func (u Upload) WriteToFile(outputPath string) {
bytes, err := json.Marshal(u) bytes, err := json.Marshal(u)
if err != nil { if err != nil {
fmt.Printf("Could not serialize json for video: %s", u.GetUploadID()) utils.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID())
fmt.Println()
panic(err) panic(err)
} }
// Create the file // Create the file
out, err := os.Create(outputPath) out, err := os.Create(outputPath)
if err != nil { utils.CheckErr(err)
panic(err)
}
defer out.Close() defer out.Close()
// Write to file // Write to file
_, err = out.Write(bytes) _, err = out.Write(bytes)
if err != nil { utils.CheckErr(err)
panic(err)
}
} }

View File

@ -3,7 +3,7 @@
"version": "0.0.1", "version": "0.0.1",
"scripts": { "scripts": {
"install-dependencies": "go get -v -t -d ./...", "install-dependencies": "go get -v -t -d ./...",
"test": "go test -v ./models", "test": "go test -v ./models && go test -v ./utils",
"clean": "rm -rf out", "clean": "rm -rf out",
"build:scraper": "node node_modules/terser/bin/terser -c -m -- scraper.js > out/scraper.js", "build:scraper": "node node_modules/terser/bin/terser -c -m -- scraper.js > out/scraper.js",
"build:app": "go build -o out/ -v .", "build:app": "go build -o out/ -v .",

10
resources/strings.go Normal file
View File

@ -0,0 +1,10 @@
package resources
// ErrorCouldNotSerializeJSON -
var ErrorCouldNotSerializeJSON = "Could not serialize json for video: %s\n"
// ErrorCouldNotRecogniseURL -
var ErrorCouldNotRecogniseURL = "Could not recognise URL format of string %s"
// ErrorPathNotFound -
var ErrorPathNotFound = "File path %s not found."

View File

@ -27,10 +27,16 @@ optStrings = {
}, },
}; };
currentState = {
preloadCount: 0,
finished: false,
};
createVidUrlElement = function(outputObj) { createVidUrlElement = function(outputObj) {
var urlSetElement = document.createElement(optStrings.tags.resultTag); var urlSetElement = document.createElement(optStrings.tags.resultTag);
urlSetElement.innerText = JSON.stringify(outputObj); urlSetElement.innerText = JSON.stringify(outputObj);
document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(urlSetElement); document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(urlSetElement);
currentState.finished = true;
} }
buldVidUrlArray = function(finishCallback) { buldVidUrlArray = function(finishCallback) {
@ -102,6 +108,7 @@ scrollWhileNew = function(finishCallback) {
var oldCount = state.count; var oldCount = state.count;
state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length; state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length;
if (oldCount !== state.count) { if (oldCount !== state.count) {
currentState.preloadCount = state.count;
window.scrollTo(0, document.body.scrollHeight); window.scrollTo(0, document.body.scrollHeight);
} else { } else {
if (document.querySelector(optStrings.selectors.feedLoading)) { if (document.querySelector(optStrings.selectors.feedLoading)) {

12
utils/checkErr.go Normal file
View File

@ -0,0 +1,12 @@
package utils
import (
"log"
)
// CheckErr - Checks if error and log
func CheckErr(err error) {
if err != nil {
log.Fatal(err)
}
}

View File

@ -10,22 +10,15 @@ import (
func DownloadFile(outputPath string, url string) { func DownloadFile(outputPath string, url string) {
// Get the data // Get the data
resp, err := http.Get(url) resp, err := http.Get(url)
if err != nil { CheckErr(err)
panic(err)
}
defer resp.Body.Close() defer resp.Body.Close()
// Create the file // Create the file
out, err := os.Create(outputPath) out, err := os.Create(outputPath)
if err != nil { CheckErr(err)
panic(err)
}
defer out.Close() defer out.Close()
// Write the body to file // Write the body to file
_, err = io.Copy(out, resp.Body) _, err = io.Copy(out, resp.Body)
CheckErr(err)
if err != nil {
panic(err)
}
} }

View File

@ -37,9 +37,7 @@ func ReadFileToString(path string) string {
// ReadFileLineByLine - Reads file line by line and calls delegate // ReadFileLineByLine - Reads file line by line and calls delegate
func ReadFileLineByLine(path string, delegate delegateString) { func ReadFileLineByLine(path string, delegate delegateString) {
file, err := os.Open(path) file, err := os.Open(path)
if err != nil { CheckErr(err)
panic(err)
}
defer file.Close() defer file.Close()
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)

28
utils/getUsername.go Normal file
View File

@ -0,0 +1,28 @@
package utils
import (
config "../models/config"
res "../resources"
"fmt"
"regexp"
"strings"
)
// GetUsername - Get's username from passed URL param
func GetUsername() string {
return GetUsernameFromString(config.Config.URL)
}
// GetUsernameFromString - Get's username from passed param
func GetUsernameFromString(str string) string {
if match := strings.Contains(str, "/"); !match { // Not url
return strings.Replace(str, "@", "", -1)
}
if match, _ := regexp.MatchString(".+tiktok\\.com/@.+", str); match { // URL
stripedSuffix := strings.Split(str, "@")[1]
return strings.Split(stripedSuffix, "/")[0]
}
panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str))
}

View File

@ -1,6 +1,7 @@
package models package utils
import ( import (
config "../models/config"
testUtil "../unitTestUtil" testUtil "../unitTestUtil"
"testing" "testing"
) )
@ -8,7 +9,7 @@ import (
func TestGetUsername(t *testing.T) { func TestGetUsername(t *testing.T) {
testCaseDelegate := func(t *testing.T, url string, username string) { testCaseDelegate := func(t *testing.T, url string, username string) {
tu := testUtil.TestUtil{T: t} tu := testUtil.TestUtil{T: t}
Config.URL = url config.Config.URL = url
actual := GetUsername() actual := GetUsername()
tu.AssertString(actual, username, "Username") tu.AssertString(actual, username, "Username")
} }

25
utils/log.go Normal file
View File

@ -0,0 +1,25 @@
package utils
import (
config "../models/config"
"fmt"
)
// Log - Write to std out
func Log(a ...interface{}) {
if !config.Config.Quiet {
fmt.Println(a...)
}
}
// Logf - Write formated text
func Logf(format string, a ...interface{}) {
if !config.Config.Quiet {
fmt.Printf(format, a...)
}
}
// LogFatal - Write error and panic
func LogFatal(format string, a ...interface{}) {
panic(fmt.Sprintf(format, a...))
}

View File

@ -2,14 +2,11 @@ package utils
import ( import (
"io/ioutil" "io/ioutil"
"log"
) )
// ReadFileAsString - Returns contents of given file // ReadFileAsString - Returns contents of given file
func ReadFileAsString(fileName string) string { func ReadFileAsString(fileName string) string {
content, err := ioutil.ReadFile(fileName) content, err := ioutil.ReadFile(fileName)
if err != nil { CheckErr(err)
log.Fatal(err)
}
return string(content) return string(content)
} }

View File

@ -1,8 +1,8 @@
package workflows package workflows
import ( import (
res "../resources"
utils "../utils" utils "../utils"
"fmt"
) )
// CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used // CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used
@ -13,7 +13,7 @@ func CanUseDownloadBatchFile(batchFilePath string) bool {
// DownloadBatchFile - Download items from batch file // DownloadBatchFile - Download items from batch file
func DownloadBatchFile(batchFilePath string) { func DownloadBatchFile(batchFilePath string) {
if !utils.CheckIfExists(batchFilePath) { if !utils.CheckIfExists(batchFilePath) {
panic(fmt.Sprintf("File path %s not found.", batchFilePath)) utils.LogFatal(res.ErrorPathNotFound, batchFilePath)
} }
utils.ReadFileLineByLine(batchFilePath, downloadItem) utils.ReadFileLineByLine(batchFilePath, downloadItem)

View File

@ -2,7 +2,7 @@ package workflows
import ( import (
client "../client" client "../client"
models "../models" config "../models/config"
utils "../utils" utils "../utils"
"fmt" "fmt"
"regexp" "regexp"
@ -17,12 +17,15 @@ func CanUseDownloadMusic(url string) bool {
// DownloadMusic - Download all videos by given music // DownloadMusic - Download all videos by given music
func DownloadMusic(url string) { func DownloadMusic(url string) {
uploads := client.GetMusicUploads(url) uploads := client.GetMusicUploads(url)
uploadCount := len(uploads)
for _, upload := range uploads { for index, upload := range uploads {
username := models.GetUsernameFromString(upload.Uploader) username := utils.GetUsernameFromString(upload.Uploader)
downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username) downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir) utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir) downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
} }
utils.Log()
} }

View File

@ -2,7 +2,7 @@ package workflows
import ( import (
client "../client" client "../client"
models "../models" config "../models/config"
utils "../utils" utils "../utils"
"fmt" "fmt"
"strings" "strings"
@ -16,12 +16,15 @@ func CanUseDownloadUser(url string) bool {
// DownloadUser - Download all user's videos // DownloadUser - Download all user's videos
func DownloadUser(username string) { func DownloadUser(username string) {
downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username)
uploads := client.GetUserUploads(username) uploads := client.GetUserUploads(username)
uploadCount := len(uploads)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir) utils.InitOutputDirectory(downloadDir)
for _, upload := range uploads { for index, upload := range uploads {
downloadVideo(upload, downloadDir) downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
} }
utils.Log()
} }

View File

@ -3,6 +3,7 @@ package workflows
import ( import (
client "../client" client "../client"
models "../models" models "../models"
config "../models/config"
utils "../utils" utils "../utils"
"fmt" "fmt"
"regexp" "regexp"
@ -16,12 +17,13 @@ func CanUseDownloadSingleVideo(url string) bool {
// DownloadSingleVideo - Downloads single video // DownloadSingleVideo - Downloads single video
func DownloadSingleVideo(url string) { func DownloadSingleVideo(url string) {
username := models.GetUsernameFromString(url) username := utils.GetUsernameFromString(url)
upload := client.GetVideoDetails(url) upload := client.GetVideoDetails(url)
downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username) downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir) utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir) downloadVideo(upload, downloadDir)
utils.Log("[1/1] Downloaded\n")
} }
// DownloadVideo - Downloads one video // DownloadVideo - Downloads one video
@ -30,14 +32,12 @@ func downloadVideo(upload models.Upload, downloadDir string) {
downloadPath := fmt.Sprintf("%s/%s.mp4", downloadDir, uploadID) downloadPath := fmt.Sprintf("%s/%s.mp4", downloadDir, uploadID)
if utils.CheckIfExists(downloadPath) { if utils.CheckIfExists(downloadPath) {
fmt.Println("Upload '" + uploadID + "' already downloaded, skipping")
return return
} }
fmt.Println("Downloading upload item '" + uploadID + "' to " + downloadPath)
utils.DownloadFile(downloadPath, upload.URL) utils.DownloadFile(downloadPath, upload.URL)
if models.Config.MetaData { if config.Config.MetaData {
metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID) metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID)
upload.WriteToFile(metadataPath) upload.WriteToFile(metadataPath)
} }

View File

@ -1,8 +1,8 @@
package workflows package workflows
import ( import (
models "../models" res "../resources"
"fmt" utils "../utils"
) )
// StartWorkflowByParameter - Start needed workflow by given parameter // StartWorkflowByParameter - Start needed workflow by given parameter
@ -22,9 +22,9 @@ func StartWorkflowByParameter(url string) {
// Tiktok user // Tiktok user
if CanUseDownloadUser(url) { if CanUseDownloadUser(url) {
DownloadUser(models.GetUsernameFromString(url)) DownloadUser(utils.GetUsernameFromString(url))
return return
} }
panic(fmt.Sprintf("Could not recognise URL format of string %s", url)) utils.LogFatal(res.ErrorCouldNotRecogniseURL, url)
} }