diff --git a/README.md b/README.md index 3b66933..4c0b208 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Clone this repository and run `go build` to build the executable. * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500) * `-quiet` - Supress output +* `-json` - Returns whole data, that was scraped from TikTok, in json +* `-limit` - Sets the max count of video that will be downloaded (default infinity) ## Acknowledgments This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ diff --git a/client/executeClientAction.go b/client/executeClientAction.go index 14ca590..cb7bddc 100644 --- a/client/executeClientAction.go +++ b/client/executeClientAction.go @@ -84,7 +84,7 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string } if jsOutput != "0" { - utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput) + utils.Logf("\rPreloading... %s items have been founded.", jsOutput) } else { utils.Logf("\rPreloading...") } diff --git a/client/getHashtagUploads.go b/client/getHashtagUploads.go new file mode 100644 index 0000000..9c01981 --- /dev/null +++ b/client/getHashtagUploads.go @@ -0,0 +1,26 @@ +package client + +import ( + models "../models" + config "../models/config" + "fmt" +) + +// GetUserUploads - Get all uploads marked with given hashtag +func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) { + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(hashtagURL, jsMethod) + if err != nil { + return nil, err + } + return models.ParseUploads(actionOutput), nil +} + +func GetHashtagUploadsJson(hashtagURL string) (string, error) { + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(hashtagURL, jsMethod) + if err != nil { + return "", err + } + return actionOutput, nil +} diff --git a/client/getMusicUploads.go b/client/getMusicUploads.go index a7fa108..320b33f 100644 --- a/client/getMusicUploads.go +++ b/client/getMusicUploads.go @@ -2,13 +2,25 @@ package client import ( models "../models" + config "../models/config" + "fmt" ) // GetMusicUploads - Get all uploads by given music func GetMusicUploads(url string) ([]models.Upload, error) { - actionOutput, err := executeClientAction(url, "bootstrapIteratingVideos()") + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(url, jsMethod) if err != nil { return nil, err } return models.ParseUploads(actionOutput), nil } + +func GetMusicUploadsJson(url string) (string, error) { + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(url, jsMethod) + if err != nil { + return "", err + } + return actionOutput, nil +} diff --git a/client/getUserUploads.go b/client/getUserUploads.go index 67c602d..3752492 100644 --- a/client/getUserUploads.go +++ b/client/getUserUploads.go @@ -2,13 +2,25 @@ package client import ( models "../models" + config "../models/config" + "fmt" ) // GetUserUploads - Get all uploads by user func GetUserUploads(username string) ([]models.Upload, error) { - actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()") + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod) if err != nil { return nil, err } return models.ParseUploads(actionOutput), nil } + +func GetUserUploadsJson(username string) (string, error) { + jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit) + actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod) + if err != nil { + return "", err + } + return actionOutput, nil +} diff --git a/models/config/config.go b/models/config/config.go index 773c9ca..3eb4ea1 100644 --- a/models/config/config.go +++ b/models/config/config.go @@ -15,6 +15,8 @@ var Config struct { MetaData bool Quiet bool Deadline int + Limit int + JSONOnly bool } // GetConfig - Returns Config object @@ -25,6 +27,8 @@ func GetConfig() { metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") quiet := flag.Bool("quiet", false, "Supress output") deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)") + limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)") + jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)") flag.Parse() args := flag.Args() @@ -44,5 +48,10 @@ func GetConfig() { Config.Debug = *debug Config.MetaData = *metadata Config.Quiet = *quiet + if *jsonOnly { + Config.Quiet = true + } Config.Deadline = *deadline + Config.Limit = *limit + Config.JSONOnly = *jsonOnly } diff --git a/scraper.js b/scraper.js index 5d2dd16..fc374e6 100644 --- a/scraper.js +++ b/scraper.js @@ -1,7 +1,7 @@ optStrings = { selectors: { feedLoading: 'div.tiktok-loading.feed-loading', - modalArrowLeft: 'div.video-card-modal > div > img.arrow-right', + modalArrowRight: 'div.video-card-modal > div > img.arrow-right', modalClose: '.video-card-modal > div > div.close', modalPlayer: 'div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video', modalShareInput: '.copy-link-container > input', @@ -36,6 +36,7 @@ optStrings = { currentState = { preloadCount: 0, finished: false, + limit: 0 }; checkForErrors = function() { @@ -65,8 +66,14 @@ buldVidUrlArray = function(finishCallback) { var videoArray = []; var intervalID = window.setInterval(x => { videoArray.push(getCurrentModalVideo()); - - var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowLeft)[0]; + if(currentState.limit > 0) { + if (videoArray.length >= currentState.limit) { + window.clearInterval(intervalID); + document.querySelector(optStrings.selectors.modalClose).click(); + finishCallback(videoArray); + } + } + var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowRight)[0]; if (arrowRight.classList.contains(optStrings.classes.modalCloseDisabled)) { window.clearInterval(intervalID); document.querySelector(optStrings.selectors.modalClose).click(); @@ -127,6 +134,12 @@ scrollWhileNew = function(finishCallback) { var intervalID = window.setInterval(x => { var oldCount = state.count; state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length; + if(currentState.limit > 0) { + if (currentState.preloadCount >= currentState.limit || state.count >= currentState.limit) { + finishCallback(createVidUrlElement); + window.clearInterval(intervalID); + } + } if(checkForErrors()) { window.clearInterval(intervalID); return; @@ -145,7 +158,8 @@ scrollWhileNew = function(finishCallback) { }, 1000); }; -bootstrapIteratingVideos = function() { +bootstrapIteratingVideos = function(limit) { + currentState.limit = limit; scrollWhileNew(buldVidUrlArray); return 'bootstrapIteratingVideos'; }; diff --git a/utils/getHashtag.go b/utils/getHashtag.go new file mode 100644 index 0000000..11c7092 --- /dev/null +++ b/utils/getHashtag.go @@ -0,0 +1,16 @@ +package utils + +import ( + res "../resources" + "fmt" + "strings" +) + +// GetHashtagFromURL - Get's tag name from passed url +func GetHashtagFromURL(str string) string { + if match := strings.Contains(str, "/tag/"); match { + return strings.Split(str, "/tag/")[1] + } + + panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str)) +} diff --git a/workflows/downloadHashtag.go b/workflows/downloadHashtag.go new file mode 100644 index 0000000..3c11010 --- /dev/null +++ b/workflows/downloadHashtag.go @@ -0,0 +1,45 @@ +package workflows + +import ( + client "../client" + config "../models/config" + res "../resources" + utils "../utils" + "fmt" + "strings" +) + +// CanUseDownloadHashtag - Test's if this workflow can be used for parameter +func CanUseDownloadHashtag(url string) bool { + match := strings.Contains(url, "/tag/") + return match +} + +// DownloadHashtag - Download videos marked with given hashtag +func DownloadHashtag(url string) { + uploads, err := client.GetHashtagUploads(url) + if err != nil { + utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + return + } + uploadCount := len(uploads) + hashtag := utils.GetHashtagFromURL(url) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag) + + utils.InitOutputDirectory(downloadDir) + + for index, upload := range uploads { + downloadVideo(upload, downloadDir) + utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount) + } + utils.Log() +} + +func GetHashtagJson(url string) { + uploads, err := client.GetHashtagUploads(url) + if err != nil { + utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + return + } + fmt.Printf("%s", uploads) +} diff --git a/workflows/downloadMusic.go b/workflows/downloadMusic.go index 92a5e3a..2b38e77 100644 --- a/workflows/downloadMusic.go +++ b/workflows/downloadMusic.go @@ -34,3 +34,12 @@ func DownloadMusic(url string) { } utils.Log() } + +func GetMusicJson(url string) { + uploads, err := client.GetMusicUploadsJson(url) + if err != nil { + utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + return + } + fmt.Printf("%s", uploads) +} diff --git a/workflows/downloadUser.go b/workflows/downloadUser.go index 5e77bd3..592fbc4 100644 --- a/workflows/downloadUser.go +++ b/workflows/downloadUser.go @@ -35,3 +35,12 @@ func DownloadUser(username string) { } utils.Log() } + +func GetUserVideosJson(username string) { + uploads, err := client.GetUserUploadsJson(username) + if err != nil { + utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error()) + return + } + fmt.Printf("%s", uploads) +} diff --git a/workflows/startWorkflowByParameter.go b/workflows/startWorkflowByParameter.go index f3af3df..6e5a6f8 100644 --- a/workflows/startWorkflowByParameter.go +++ b/workflows/startWorkflowByParameter.go @@ -1,6 +1,7 @@ package workflows import ( + config "../models/config" res "../resources" utils "../utils" ) @@ -10,7 +11,11 @@ func StartWorkflowByParameter(url string) { // Music if CanUseDownloadMusic(url) { - DownloadMusic(url) + if config.Config.JSONOnly { + GetMusicJson(url) + } else { + DownloadMusic(url) + } return } @@ -22,7 +27,22 @@ func StartWorkflowByParameter(url string) { // Tiktok user if CanUseDownloadUser(url) { - DownloadUser(utils.GetUsernameFromString(url)) + if config.Config.JSONOnly { + GetUserVideosJson(utils.GetUsernameFromString(url)) + } else { + DownloadUser(utils.GetUsernameFromString(url)) + } + + return + } + + // Tiktok hashtag + if CanUseDownloadHashtag(url) { + if config.Config.JSONOnly { + GetHashtagJson(url) + } else { + DownloadHashtag(url) + } return }