From f9d35e3bf2f731cab74c06e0e6d5253343b33847 Mon Sep 17 00:00:00 2001 From: Pijus Kamandulis Date: Sun, 22 Mar 2020 02:10:24 +0200 Subject: [PATCH] TTDL-7 Added flag; Code clean up --- README.md | 9 +++-- client/executeClientAction.go | 13 ++++--- client/getHashtagUploads.go | 12 +++--- client/getMusicUploads.go | 9 +++-- client/getUserUploads.go | 9 +++-- models/config/config.go | 25 +++++++------ models/upload.go | 12 +++--- utils/archive.go | 53 +++++++++++++++++++++++++++ utils/{ => checkErr}/checkErr.go | 0 utils/downloadFile.go | 8 ++-- utils/fileio.go | 15 +++++++- utils/{ => log}/log.go | 3 +- utils/readFileAsString.go | 4 +- workflows/downloadBatchFile.go | 3 +- workflows/downloadHashtag.go | 22 +++++++---- workflows/downloadMusic.go | 21 +++++++---- workflows/downloadUser.go | 24 +++++++----- workflows/downloadVideo.go | 16 ++++++-- workflows/startWorkflowByParameter.go | 9 +++-- 19 files changed, 188 insertions(+), 79 deletions(-) create mode 100644 utils/archive.go rename utils/{ => checkErr}/checkErr.go (100%) rename utils/{ => log}/log.go (94%) diff --git a/README.md b/README.md index 4c0b208..f76515f 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,15 @@ You can download items listed in a text file by running `./tiktok-dl [OPTIONS] - Clone this repository and run `go build` to build the executable. ## Available options -* `-debug` - enables debug mode -* `-output some_directory` - Output path (default "./downloads") -* `-metadata` - Write video metadata to a .json file +* `-archive` - Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500) -* `-quiet` - Supress output +* `-debug` - enables debug mode * `-json` - Returns whole data, that was scraped from TikTok, in json * `-limit` - Sets the max count of video that will be downloaded (default infinity) +* `-metadata` - Write video metadata to a .json file +* `-output some_directory` - Output path (default "./downloads") +* `-quiet` - Supress output ## Acknowledgments This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ diff --git a/client/executeClientAction.go b/client/executeClientAction.go index cb7bddc..9d0e295 100644 --- a/client/executeClientAction.go +++ b/client/executeClientAction.go @@ -3,15 +3,16 @@ package client import ( "context" "errors" - "github.com/chromedp/chromedp" "io/ioutil" - "log" "os" "strings" "time" + "github.com/chromedp/chromedp" + config "../models/config" utils "../utils" + log "../utils/log" ) // GetMusicUploads - Get all uploads by given music @@ -33,7 +34,7 @@ func executeClientAction(url string, jsAction string) (string, error) { ctx, cancel := chromedp.NewContext( allocCtx, - chromedp.WithLogf(log.Printf), + chromedp.WithLogf(log.Logf), ) defer cancel() @@ -84,9 +85,9 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string } if jsOutput != "0" { - utils.Logf("\rPreloading... %s items have been founded.", jsOutput) + log.Logf("\rPreloading... %s items have been found.", jsOutput) } else { - utils.Logf("\rPreloading...") + log.Logf("\rPreloading...") } if err := chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput)); err != nil { @@ -100,7 +101,7 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string time.Sleep(50 * time.Millisecond) } - utils.Log("\nRetrieving items...") + log.Log("\nRetrieving items...") if err := chromedp.Run(ctx, // Wait until custom js finishes chromedp.WaitVisible(`video_urls`), diff --git a/client/getHashtagUploads.go b/client/getHashtagUploads.go index 9c01981..4d26abc 100644 --- a/client/getHashtagUploads.go +++ b/client/getHashtagUploads.go @@ -1,22 +1,24 @@ package client import ( + "fmt" + models "../models" config "../models/config" - "fmt" ) -// GetUserUploads - Get all uploads marked with given hashtag +// GetHashtagUploads - Get all uploads marked with given hashtag func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) { - jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) - actionOutput, err := executeClientAction(hashtagURL, jsMethod) + actionOutput, err := GetHashtagUploadsJSON(hashtagURL) if err != nil { return nil, err } + return models.ParseUploads(actionOutput), nil } -func GetHashtagUploadsJson(hashtagURL string) (string, error) { +// GetHashtagUploadsJSON - Get hashtag uploads scrape +func GetHashtagUploadsJSON(hashtagURL string) (string, error) { jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) actionOutput, err := executeClientAction(hashtagURL, jsMethod) if err != nil { diff --git a/client/getMusicUploads.go b/client/getMusicUploads.go index 320b33f..5b98cfe 100644 --- a/client/getMusicUploads.go +++ b/client/getMusicUploads.go @@ -1,22 +1,23 @@ package client import ( + "fmt" + models "../models" config "../models/config" - "fmt" ) // GetMusicUploads - Get all uploads by given music func GetMusicUploads(url string) ([]models.Upload, error) { - jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) - actionOutput, err := executeClientAction(url, jsMethod) + actionOutput, err := GetMusicUploadsJSON(url) if err != nil { return nil, err } return models.ParseUploads(actionOutput), nil } -func GetMusicUploadsJson(url string) (string, error) { +// GetMusicUploadsJSON - Get music uploads scrape +func GetMusicUploadsJSON(url string) (string, error) { jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) actionOutput, err := executeClientAction(url, jsMethod) if err != nil { diff --git a/client/getUserUploads.go b/client/getUserUploads.go index 3752492..09b0f4d 100644 --- a/client/getUserUploads.go +++ b/client/getUserUploads.go @@ -1,22 +1,23 @@ package client import ( + "fmt" + models "../models" config "../models/config" - "fmt" ) // GetUserUploads - Get all uploads by user func GetUserUploads(username string) ([]models.Upload, error) { - jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) - actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod) + actionOutput, err := GetUserUploadsJSON(username) if err != nil { return nil, err } return models.ParseUploads(actionOutput), nil } -func GetUserUploadsJson(username string) (string, error) { +// GetUserUploadsJSON - Get user uploads scrape +func GetUserUploadsJSON(username string) (string, error) { jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod) if err != nil { diff --git a/models/config/config.go b/models/config/config.go index 3eb4ea1..38fa0fd 100644 --- a/models/config/config.go +++ b/models/config/config.go @@ -8,27 +8,29 @@ import ( // Config - Runtime configuration var Config struct { - URL string - OutputPath string - BatchFilePath string - Debug bool - MetaData bool - Quiet bool - Deadline int - Limit int - JSONOnly bool + URL string + OutputPath string + BatchFilePath string + ArchiveFilePath string + Debug bool + MetaData bool + Quiet bool + JSONOnly bool + Deadline int + Limit int } // GetConfig - Returns Config object func GetConfig() { outputPath := flag.String("output", "./downloads", "Output path") batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.") + archive := flag.String("archive", "", "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.") debug := flag.Bool("debug", false, "Enables debug mode") metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") quiet := flag.Bool("quiet", false, "Supress output") + jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)") deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)") limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)") - jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)") flag.Parse() args := flag.Args() @@ -45,13 +47,14 @@ func GetConfig() { } Config.OutputPath = *outputPath Config.BatchFilePath = *batchFilePath + Config.ArchiveFilePath = *archive Config.Debug = *debug Config.MetaData = *metadata Config.Quiet = *quiet if *jsonOnly { Config.Quiet = true } + Config.JSONOnly = *jsonOnly Config.Deadline = *deadline Config.Limit = *limit - Config.JSONOnly = *jsonOnly } diff --git a/models/upload.go b/models/upload.go index 3f45cc7..40930d1 100644 --- a/models/upload.go +++ b/models/upload.go @@ -1,11 +1,13 @@ package models import ( - res "../resources" - utils "../utils" "encoding/json" "os" "strings" + + res "../resources" + checkErr "../utils/checkErr" + log "../utils/log" ) // Upload - Upload object @@ -47,16 +49,16 @@ func (u Upload) GetUploadID() string { func (u Upload) WriteToFile(outputPath string) { bytes, err := json.Marshal(u) if err != nil { - utils.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID()) + log.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID()) panic(err) } // Create the file out, err := os.Create(outputPath) - utils.CheckErr(err) + checkErr.CheckErr(err) defer out.Close() // Write to file _, err = out.Write(bytes) - utils.CheckErr(err) + checkErr.CheckErr(err) } diff --git a/utils/archive.go b/utils/archive.go new file mode 100644 index 0000000..363ff26 --- /dev/null +++ b/utils/archive.go @@ -0,0 +1,53 @@ +package utils + +import ( + models "../models" + config "../models/config" + log "./log" +) + +// IsItemInArchive - Checks if the item is already archived +func IsItemInArchive(upload models.Upload) bool { + if len(RemoveArchivedItems([]models.Upload{upload})) == 0 { + return true + } + return false +} + +// RemoveArchivedItems - Returns items slice without archived items +func RemoveArchivedItems(uploads []models.Upload) []models.Upload { + archiveFilePath := config.Config.ArchiveFilePath + + if archiveFilePath == "" || !CheckIfExists(archiveFilePath) { + return uploads + } + + removeArchivedItemsDelegate := func(archivedItem string) { + for i, upload := range uploads { + if upload.GetUploadID() == archivedItem { + uploads = append(uploads[:i], uploads[i+1:]...) + } + } + } + + lenBeforeRemoval := len(uploads) + ReadFileLineByLine(archiveFilePath, removeArchivedItemsDelegate) + + removedCount := lenBeforeRemoval - len(uploads) + if removedCount > 0 { + log.Logf("%d items, found in archive. Skipping...\n", removedCount) + } + + return uploads +} + +// AddItemToArchive - Adds item to archived list +func AddItemToArchive(uploadID string) { + archiveFilePath := config.Config.ArchiveFilePath + + if archiveFilePath == "" { + return + } + + AppendToFile(uploadID, archiveFilePath) +} diff --git a/utils/checkErr.go b/utils/checkErr/checkErr.go similarity index 100% rename from utils/checkErr.go rename to utils/checkErr/checkErr.go diff --git a/utils/downloadFile.go b/utils/downloadFile.go index c7a2654..07c964d 100644 --- a/utils/downloadFile.go +++ b/utils/downloadFile.go @@ -4,21 +4,23 @@ import ( "io" "net/http" "os" + + checkErr "./checkErr" ) // DownloadFile - Downloads content from `url` and stores it in `outputPath` func DownloadFile(outputPath string, url string) { // Get the data resp, err := http.Get(url) - CheckErr(err) + checkErr.CheckErr(err) defer resp.Body.Close() // Create the file out, err := os.Create(outputPath) - CheckErr(err) + checkErr.CheckErr(err) defer out.Close() // Write the body to file _, err = io.Copy(out, resp.Body) - CheckErr(err) + checkErr.CheckErr(err) } diff --git a/utils/fileio.go b/utils/fileio.go index f8b9c6a..85b72cc 100644 --- a/utils/fileio.go +++ b/utils/fileio.go @@ -4,6 +4,8 @@ import ( "bufio" "io/ioutil" "os" + + checkErr "./checkErr" ) type delegateString func(string) @@ -37,7 +39,7 @@ func ReadFileToString(path string) string { // ReadFileLineByLine - Reads file line by line and calls delegate func ReadFileLineByLine(path string, delegate delegateString) { file, err := os.Open(path) - CheckErr(err) + checkErr.CheckErr(err) defer file.Close() scanner := bufio.NewScanner(file) @@ -49,3 +51,14 @@ func ReadFileLineByLine(path string, delegate delegateString) { panic(err) } } + +// AppendToFile - Appends line to file +func AppendToFile(str string, filePath string) { + f, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + checkErr.CheckErr(err) + + defer f.Close() + if _, err := f.WriteString(str + "\n"); err != nil { + checkErr.CheckErr(err) + } +} diff --git a/utils/log.go b/utils/log/log.go similarity index 94% rename from utils/log.go rename to utils/log/log.go index 6ab790a..2f1dac4 100644 --- a/utils/log.go +++ b/utils/log/log.go @@ -1,9 +1,10 @@ package utils import ( - config "../models/config" "fmt" "os" + + config "../../models/config" ) // Log - Write to std out diff --git a/utils/readFileAsString.go b/utils/readFileAsString.go index 45bcda3..8b7250e 100644 --- a/utils/readFileAsString.go +++ b/utils/readFileAsString.go @@ -2,11 +2,13 @@ package utils import ( "io/ioutil" + + checkErr "./checkErr" ) // ReadFileAsString - Returns contents of given file func ReadFileAsString(fileName string) string { content, err := ioutil.ReadFile(fileName) - CheckErr(err) + checkErr.CheckErr(err) return string(content) } diff --git a/workflows/downloadBatchFile.go b/workflows/downloadBatchFile.go index d44084b..e0fdfe5 100644 --- a/workflows/downloadBatchFile.go +++ b/workflows/downloadBatchFile.go @@ -3,6 +3,7 @@ package workflows import ( res "../resources" utils "../utils" + log "../utils/log" ) // CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used @@ -13,7 +14,7 @@ func CanUseDownloadBatchFile(batchFilePath string) bool { // DownloadBatchFile - Download items from batch file func DownloadBatchFile(batchFilePath string) { if !utils.CheckIfExists(batchFilePath) { - utils.LogFatal(res.ErrorPathNotFound, batchFilePath) + log.LogFatal(res.ErrorPathNotFound, batchFilePath) } utils.ReadFileLineByLine(batchFilePath, downloadItem) diff --git a/workflows/downloadHashtag.go b/workflows/downloadHashtag.go index 3c11010..d07e76a 100644 --- a/workflows/downloadHashtag.go +++ b/workflows/downloadHashtag.go @@ -1,12 +1,14 @@ package workflows import ( + "fmt" + "strings" + client "../client" config "../models/config" res "../resources" utils "../utils" - "fmt" - "strings" + log "../utils/log" ) // CanUseDownloadHashtag - Test's if this workflow can be used for parameter @@ -19,10 +21,13 @@ func CanUseDownloadHashtag(url string) bool { func DownloadHashtag(url string) { uploads, err := client.GetHashtagUploads(url) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } + + uploads = utils.RemoveArchivedItems(uploads) uploadCount := len(uploads) + hashtag := utils.GetHashtagFromURL(url) downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag) @@ -30,15 +35,16 @@ func DownloadHashtag(url string) { for index, upload := range uploads { downloadVideo(upload, downloadDir) - utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) + log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) } - utils.Log() + log.Log() } -func GetHashtagJson(url string) { - uploads, err := client.GetHashtagUploads(url) +// GetHashtagJSON - Prints scraped info from hashtag +func GetHashtagJSON(url string) { + uploads, err := client.GetHashtagUploadsJSON(url) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } fmt.Printf("%s", uploads) diff --git a/workflows/downloadMusic.go b/workflows/downloadMusic.go index 2b38e77..8ce7ad5 100644 --- a/workflows/downloadMusic.go +++ b/workflows/downloadMusic.go @@ -1,12 +1,14 @@ package workflows import ( + "fmt" + "regexp" + client "../client" config "../models/config" res "../resources" utils "../utils" - "fmt" - "regexp" + log "../utils/log" ) // CanUseDownloadMusic - Check's if DownloadMusic can be used for parameter @@ -19,9 +21,11 @@ func CanUseDownloadMusic(url string) bool { func DownloadMusic(url string) { uploads, err := client.GetMusicUploads(url) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } + + uploads = utils.RemoveArchivedItems(uploads) uploadCount := len(uploads) for index, upload := range uploads { @@ -30,15 +34,16 @@ func DownloadMusic(url string) { utils.InitOutputDirectory(downloadDir) downloadVideo(upload, downloadDir) - utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) + log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) } - utils.Log() + log.Log() } -func GetMusicJson(url string) { - uploads, err := client.GetMusicUploadsJson(url) +// GetMusicJSON - Prints scraped info from music +func GetMusicJSON(url string) { + uploads, err := client.GetMusicUploadsJSON(url) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } fmt.Printf("%s", uploads) diff --git a/workflows/downloadUser.go b/workflows/downloadUser.go index 592fbc4..3b767ca 100644 --- a/workflows/downloadUser.go +++ b/workflows/downloadUser.go @@ -1,13 +1,15 @@ package workflows import ( + "fmt" + "regexp" + "strings" + client "../client" config "../models/config" res "../resources" utils "../utils" - "fmt" - "regexp" - "strings" + log "../utils/log" ) // CanUseDownloadUser - Test's if this workflow can be used for parameter @@ -21,25 +23,29 @@ func CanUseDownloadUser(url string) bool { func DownloadUser(username string) { uploads, err := client.GetUserUploads(username) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } + + uploads = utils.RemoveArchivedItems(uploads) uploadCount := len(uploads) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) utils.InitOutputDirectory(downloadDir) for index, upload := range uploads { downloadVideo(upload, downloadDir) - utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) + log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) } - utils.Log() + log.Log() } -func GetUserVideosJson(username string) { - uploads, err := client.GetUserUploadsJson(username) +// GetUserVideosJSON - Prints scraped info from user +func GetUserVideosJSON(username string) { + uploads, err := client.GetUserUploadsJSON(username) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) return } fmt.Printf("%s", uploads) diff --git a/workflows/downloadVideo.go b/workflows/downloadVideo.go index 24fd9c1..7f4adfb 100644 --- a/workflows/downloadVideo.go +++ b/workflows/downloadVideo.go @@ -1,13 +1,15 @@ package workflows import ( + "fmt" + "regexp" + client "../client" models "../models" config "../models/config" res "../resources" utils "../utils" - "fmt" - "regexp" + log "../utils/log" ) // CanUseDownloadSingleVideo - Check's if DownloadSingleVideo can be used for parameter @@ -21,14 +23,18 @@ func DownloadSingleVideo(url string) { username := utils.GetUsernameFromString(url) upload, err := client.GetVideoDetails(url) if err != nil { - utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + return + } + + if utils.IsItemInArchive(upload) { return } downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) utils.InitOutputDirectory(downloadDir) downloadVideo(upload, downloadDir) - utils.Log("[1/1] Downloaded\n") + log.Log("[1/1] Downloaded\n") } // DownloadVideo - Downloads one video @@ -46,4 +52,6 @@ func downloadVideo(upload models.Upload, downloadDir string) { metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID) upload.WriteToFile(metadataPath) } + + utils.AddItemToArchive(upload.GetUploadID()) } diff --git a/workflows/startWorkflowByParameter.go b/workflows/startWorkflowByParameter.go index 6e5a6f8..2d76055 100644 --- a/workflows/startWorkflowByParameter.go +++ b/workflows/startWorkflowByParameter.go @@ -4,6 +4,7 @@ import ( config "../models/config" res "../resources" utils "../utils" + log "../utils/log" ) // StartWorkflowByParameter - Start needed workflow by given parameter @@ -12,7 +13,7 @@ func StartWorkflowByParameter(url string) { // Music if CanUseDownloadMusic(url) { if config.Config.JSONOnly { - GetMusicJson(url) + GetMusicJSON(url) } else { DownloadMusic(url) } @@ -28,7 +29,7 @@ func StartWorkflowByParameter(url string) { // Tiktok user if CanUseDownloadUser(url) { if config.Config.JSONOnly { - GetUserVideosJson(utils.GetUsernameFromString(url)) + GetUserVideosJSON(utils.GetUsernameFromString(url)) } else { DownloadUser(utils.GetUsernameFromString(url)) } @@ -39,12 +40,12 @@ func StartWorkflowByParameter(url string) { // Tiktok hashtag if CanUseDownloadHashtag(url) { if config.Config.JSONOnly { - GetHashtagJson(url) + GetHashtagJSON(url) } else { DownloadHashtag(url) } return } - utils.LogFatal(res.ErrorCouldNotRecogniseURL, url) + log.LogFatal(res.ErrorCouldNotRecogniseURL, url) }