diff --git a/.gitignore b/.gitignore index 178d90c..54b4f6a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ downloads *.exe tiktok-dl batch_file.txt +debug.log diff --git a/README.md b/README.md index dd21110..3b66933 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Clone this repository and run `go build` to build the executable. * `-metadata` - Write video metadata to a .json file * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500) +* `-quiet` - Supress output ## Acknowledgments This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ diff --git a/client/executeClientAction.go b/client/executeClientAction.go index 98745b3..74fee36 100644 --- a/client/executeClientAction.go +++ b/client/executeClientAction.go @@ -8,22 +8,20 @@ import ( "os" "time" - models "../models" + config "../models/config" utils "../utils" ) // GetMusicUploads - Get all uploads by given music func executeClientAction(url string, jsAction string) string { dir, err := ioutil.TempDir("", "chromedp-example") - if err != nil { - panic(err) - } + utils.CheckErr(err) defer os.RemoveAll(dir) opts := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.DisableGPU, chromedp.UserDataDir(dir), - chromedp.Flag("headless", !models.Config.Debug), + chromedp.Flag("headless", !config.Config.Debug), ) allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) @@ -35,11 +33,18 @@ func executeClientAction(url string, jsAction string) string { ) defer cancel() - ctx, cancel = context.WithTimeout(ctx, time.Duration(models.Config.Deadline)*time.Second) + ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second) defer cancel() var jsOutput string - err = chromedp.Run(ctx, + jsOutput = runScrapeWithInfo(ctx, jsAction, url) + + return jsOutput +} + +func runScrapeQuiet(ctx context.Context, jsAction string, url string) string { + var jsOutput string + err := chromedp.Run(ctx, // Navigate to user's page chromedp.Navigate(url), // Execute url grabber script @@ -50,9 +55,47 @@ func executeClientAction(url string, jsAction string) string { // Grab url links from our element chromedp.InnerHTML(`video_urls`, &jsOutput), ) - if err != nil { - log.Fatal(err) + utils.CheckErr(err) + return jsOutput +} + +func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string { + var jsOutput string + err := chromedp.Run(ctx, + // Navigate to user's page + chromedp.Navigate(url), + // Execute url grabber script + chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput), + chromedp.EvaluateAsDevTools(jsAction, &jsOutput), + ) + utils.CheckErr(err) + + for { + err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput)) + utils.CheckErr(err) + if jsOutput != "0" { + utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput) + } else { + utils.Logf("\rPreloading...") + } + + err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput)) + utils.CheckErr(err) + if jsOutput == "true" { + break + } + + time.Sleep(50 * time.Millisecond) } + utils.Log("\nRetrieving items...") + err = chromedp.Run(ctx, + // Wait until custom js finishes + chromedp.WaitVisible(`video_urls`), + // Grab url links from our element + chromedp.InnerHTML(`video_urls`, &jsOutput), + ) + utils.CheckErr(err) + return jsOutput } diff --git a/main.go b/main.go index 9b7a2f1..681f6fe 100644 --- a/main.go +++ b/main.go @@ -1,14 +1,14 @@ package main import ( - models "./models" + config "./models/config" workflows "./workflows" ) func main() { - models.GetConfig() - url := models.Config.URL - batchFilePath := models.Config.BatchFilePath + config.GetConfig() + url := config.Config.URL + batchFilePath := config.Config.BatchFilePath // Batch file if workflows.CanUseDownloadBatchFile(batchFilePath) { diff --git a/models/config.go b/models/config/config.go similarity index 68% rename from models/config.go rename to models/config/config.go index 40fc4dc..773c9ca 100644 --- a/models/config.go +++ b/models/config/config.go @@ -1,11 +1,9 @@ -package models +package config import ( "flag" "fmt" "os" - "regexp" - "strings" ) // Config - Runtime configuration @@ -15,6 +13,7 @@ var Config struct { BatchFilePath string Debug bool MetaData bool + Quiet bool Deadline int } @@ -24,6 +23,7 @@ func GetConfig() { batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.") debug := flag.Bool("debug", false, "Enables debug mode") metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") + quiet := flag.Bool("quiet", false, "Supress output") deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)") flag.Parse() @@ -43,24 +43,6 @@ func GetConfig() { Config.BatchFilePath = *batchFilePath Config.Debug = *debug Config.MetaData = *metadata + Config.Quiet = *quiet Config.Deadline = *deadline } - -// GetUsername - Get's username from passed URL param -func GetUsername() string { - return GetUsernameFromString(Config.URL) -} - -// GetUsernameFromString - Get's username from passed param -func GetUsernameFromString(str string) string { - if match := strings.Contains(str, "/"); !match { // Not url - return strings.Replace(str, "@", "", -1) - } - - if match, _ := regexp.MatchString(".+tiktok\\.com/@.+", str); match { // URL - stripedSuffix := strings.Split(str, "@")[1] - return strings.Split(stripedSuffix, "/")[0] - } - - panic("Could not recognise URL format") -} diff --git a/models/upload.go b/models/upload.go index 09e579d..3f45cc7 100644 --- a/models/upload.go +++ b/models/upload.go @@ -1,8 +1,9 @@ package models import ( + res "../resources" + utils "../utils" "encoding/json" - "fmt" "os" "strings" ) @@ -46,21 +47,16 @@ func (u Upload) GetUploadID() string { func (u Upload) WriteToFile(outputPath string) { bytes, err := json.Marshal(u) if err != nil { - fmt.Printf("Could not serialize json for video: %s", u.GetUploadID()) - fmt.Println() + utils.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID()) panic(err) } // Create the file out, err := os.Create(outputPath) - if err != nil { - panic(err) - } + utils.CheckErr(err) defer out.Close() // Write to file _, err = out.Write(bytes) - if err != nil { - panic(err) - } + utils.CheckErr(err) } diff --git a/package.json b/package.json index 7a09164..df78bd1 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "0.0.1", "scripts": { "install-dependencies": "go get -v -t -d ./...", - "test": "go test -v ./models", + "test": "go test -v ./models && go test -v ./utils", "clean": "rm -rf out", "build:scraper": "node node_modules/terser/bin/terser -c -m -- scraper.js > out/scraper.js", "build:app": "go build -o out/ -v .", diff --git a/resources/strings.go b/resources/strings.go new file mode 100644 index 0000000..6438c07 --- /dev/null +++ b/resources/strings.go @@ -0,0 +1,10 @@ +package resources + +// ErrorCouldNotSerializeJSON - +var ErrorCouldNotSerializeJSON = "Could not serialize json for video: %s\n" + +// ErrorCouldNotRecogniseURL - +var ErrorCouldNotRecogniseURL = "Could not recognise URL format of string %s" + +// ErrorPathNotFound - +var ErrorPathNotFound = "File path %s not found." diff --git a/scraper.js b/scraper.js index d174c2a..e45a9b6 100644 --- a/scraper.js +++ b/scraper.js @@ -27,10 +27,16 @@ optStrings = { }, }; +currentState = { + preloadCount: 0, + finished: false, +}; + createVidUrlElement = function(outputObj) { var urlSetElement = document.createElement(optStrings.tags.resultTag); urlSetElement.innerText = JSON.stringify(outputObj); document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(urlSetElement); + currentState.finished = true; } buldVidUrlArray = function(finishCallback) { @@ -102,6 +108,7 @@ scrollWhileNew = function(finishCallback) { var oldCount = state.count; state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length; if (oldCount !== state.count) { + currentState.preloadCount = state.count; window.scrollTo(0, document.body.scrollHeight); } else { if (document.querySelector(optStrings.selectors.feedLoading)) { diff --git a/utils/checkErr.go b/utils/checkErr.go new file mode 100644 index 0000000..e933107 --- /dev/null +++ b/utils/checkErr.go @@ -0,0 +1,12 @@ +package utils + +import ( + "log" +) + +// CheckErr - Checks if error and log +func CheckErr(err error) { + if err != nil { + log.Fatal(err) + } +} diff --git a/utils/downloadFile.go b/utils/downloadFile.go index 34d97c5..c7a2654 100644 --- a/utils/downloadFile.go +++ b/utils/downloadFile.go @@ -10,22 +10,15 @@ import ( func DownloadFile(outputPath string, url string) { // Get the data resp, err := http.Get(url) - if err != nil { - panic(err) - } + CheckErr(err) defer resp.Body.Close() // Create the file out, err := os.Create(outputPath) - if err != nil { - panic(err) - } + CheckErr(err) defer out.Close() // Write the body to file _, err = io.Copy(out, resp.Body) - - if err != nil { - panic(err) - } + CheckErr(err) } diff --git a/utils/fileio.go b/utils/fileio.go index 0a59e98..f8b9c6a 100644 --- a/utils/fileio.go +++ b/utils/fileio.go @@ -37,9 +37,7 @@ func ReadFileToString(path string) string { // ReadFileLineByLine - Reads file line by line and calls delegate func ReadFileLineByLine(path string, delegate delegateString) { file, err := os.Open(path) - if err != nil { - panic(err) - } + CheckErr(err) defer file.Close() scanner := bufio.NewScanner(file) diff --git a/utils/getUsername.go b/utils/getUsername.go new file mode 100644 index 0000000..1d970a0 --- /dev/null +++ b/utils/getUsername.go @@ -0,0 +1,28 @@ +package utils + +import ( + config "../models/config" + res "../resources" + "fmt" + "regexp" + "strings" +) + +// GetUsername - Get's username from passed URL param +func GetUsername() string { + return GetUsernameFromString(config.Config.URL) +} + +// GetUsernameFromString - Get's username from passed param +func GetUsernameFromString(str string) string { + if match := strings.Contains(str, "/"); !match { // Not url + return strings.Replace(str, "@", "", -1) + } + + if match, _ := regexp.MatchString(".+tiktok\\.com/@.+", str); match { // URL + stripedSuffix := strings.Split(str, "@")[1] + return strings.Split(stripedSuffix, "/")[0] + } + + panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str)) +} diff --git a/models/config_test.go b/utils/getUsername_test.go similarity index 93% rename from models/config_test.go rename to utils/getUsername_test.go index 3a0474e..e60c9bf 100644 --- a/models/config_test.go +++ b/utils/getUsername_test.go @@ -1,6 +1,7 @@ -package models +package utils import ( + config "../models/config" testUtil "../unitTestUtil" "testing" ) @@ -8,7 +9,7 @@ import ( func TestGetUsername(t *testing.T) { testCaseDelegate := func(t *testing.T, url string, username string) { tu := testUtil.TestUtil{T: t} - Config.URL = url + config.Config.URL = url actual := GetUsername() tu.AssertString(actual, username, "Username") } diff --git a/utils/log.go b/utils/log.go new file mode 100644 index 0000000..6d385f2 --- /dev/null +++ b/utils/log.go @@ -0,0 +1,25 @@ +package utils + +import ( + config "../models/config" + "fmt" +) + +// Log - Write to std out +func Log(a ...interface{}) { + if !config.Config.Quiet { + fmt.Println(a...) + } +} + +// Logf - Write formated text +func Logf(format string, a ...interface{}) { + if !config.Config.Quiet { + fmt.Printf(format, a...) + } +} + +// LogFatal - Write error and panic +func LogFatal(format string, a ...interface{}) { + panic(fmt.Sprintf(format, a...)) +} diff --git a/utils/readFileAsString.go b/utils/readFileAsString.go index 60f2e46..45bcda3 100644 --- a/utils/readFileAsString.go +++ b/utils/readFileAsString.go @@ -2,14 +2,11 @@ package utils import ( "io/ioutil" - "log" ) // ReadFileAsString - Returns contents of given file func ReadFileAsString(fileName string) string { content, err := ioutil.ReadFile(fileName) - if err != nil { - log.Fatal(err) - } + CheckErr(err) return string(content) } diff --git a/workflows/downloadBatchFile.go b/workflows/downloadBatchFile.go index 989dc83..d44084b 100644 --- a/workflows/downloadBatchFile.go +++ b/workflows/downloadBatchFile.go @@ -1,8 +1,8 @@ package workflows import ( + res "../resources" utils "../utils" - "fmt" ) // CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used @@ -13,7 +13,7 @@ func CanUseDownloadBatchFile(batchFilePath string) bool { // DownloadBatchFile - Download items from batch file func DownloadBatchFile(batchFilePath string) { if !utils.CheckIfExists(batchFilePath) { - panic(fmt.Sprintf("File path %s not found.", batchFilePath)) + utils.LogFatal(res.ErrorPathNotFound, batchFilePath) } utils.ReadFileLineByLine(batchFilePath, downloadItem) diff --git a/workflows/downloadMusic.go b/workflows/downloadMusic.go index 09cba06..8fecd37 100644 --- a/workflows/downloadMusic.go +++ b/workflows/downloadMusic.go @@ -2,7 +2,7 @@ package workflows import ( client "../client" - models "../models" + config "../models/config" utils "../utils" "fmt" "regexp" @@ -17,12 +17,15 @@ func CanUseDownloadMusic(url string) bool { // DownloadMusic - Download all videos by given music func DownloadMusic(url string) { uploads := client.GetMusicUploads(url) + uploadCount := len(uploads) - for _, upload := range uploads { - username := models.GetUsernameFromString(upload.Uploader) - downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username) + for index, upload := range uploads { + username := utils.GetUsernameFromString(upload.Uploader) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) utils.InitOutputDirectory(downloadDir) downloadVideo(upload, downloadDir) + utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) } + utils.Log() } diff --git a/workflows/downloadUser.go b/workflows/downloadUser.go index 1443620..980921c 100644 --- a/workflows/downloadUser.go +++ b/workflows/downloadUser.go @@ -2,7 +2,7 @@ package workflows import ( client "../client" - models "../models" + config "../models/config" utils "../utils" "fmt" "strings" @@ -16,12 +16,15 @@ func CanUseDownloadUser(url string) bool { // DownloadUser - Download all user's videos func DownloadUser(username string) { - downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username) uploads := client.GetUserUploads(username) + uploadCount := len(uploads) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) utils.InitOutputDirectory(downloadDir) - for _, upload := range uploads { + for index, upload := range uploads { downloadVideo(upload, downloadDir) + utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) } + utils.Log() } diff --git a/workflows/downloadVideo.go b/workflows/downloadVideo.go index 27da001..e538db1 100644 --- a/workflows/downloadVideo.go +++ b/workflows/downloadVideo.go @@ -3,6 +3,7 @@ package workflows import ( client "../client" models "../models" + config "../models/config" utils "../utils" "fmt" "regexp" @@ -16,12 +17,13 @@ func CanUseDownloadSingleVideo(url string) bool { // DownloadSingleVideo - Downloads single video func DownloadSingleVideo(url string) { - username := models.GetUsernameFromString(url) + username := utils.GetUsernameFromString(url) upload := client.GetVideoDetails(url) - downloadDir := fmt.Sprintf("%s/%s", models.Config.OutputPath, username) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) utils.InitOutputDirectory(downloadDir) downloadVideo(upload, downloadDir) + utils.Log("[1/1] Downloaded\n") } // DownloadVideo - Downloads one video @@ -30,14 +32,12 @@ func downloadVideo(upload models.Upload, downloadDir string) { downloadPath := fmt.Sprintf("%s/%s.mp4", downloadDir, uploadID) if utils.CheckIfExists(downloadPath) { - fmt.Println("Upload '" + uploadID + "' already downloaded, skipping") return } - fmt.Println("Downloading upload item '" + uploadID + "' to " + downloadPath) utils.DownloadFile(downloadPath, upload.URL) - if models.Config.MetaData { + if config.Config.MetaData { metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID) upload.WriteToFile(metadataPath) } diff --git a/workflows/startWorkflowByParameter.go b/workflows/startWorkflowByParameter.go index 6d990d4..f3af3df 100644 --- a/workflows/startWorkflowByParameter.go +++ b/workflows/startWorkflowByParameter.go @@ -1,8 +1,8 @@ package workflows import ( - models "../models" - "fmt" + res "../resources" + utils "../utils" ) // StartWorkflowByParameter - Start needed workflow by given parameter @@ -22,9 +22,9 @@ func StartWorkflowByParameter(url string) { // Tiktok user if CanUseDownloadUser(url) { - DownloadUser(models.GetUsernameFromString(url)) + DownloadUser(utils.GetUsernameFromString(url)) return } - panic(fmt.Sprintf("Could not recognise URL format of string %s", url)) + utils.LogFatal(res.ErrorCouldNotRecogniseURL, url) }