From e2e4ba0d4b3ad54b24ac64e8242eee5727328c54 Mon Sep 17 00:00:00 2001 From: Pijus Kamandulis Date: Sun, 16 Aug 2020 18:56:30 +0300 Subject: [PATCH] Added ability to download from scrapeData (json) --- generator/resources.go | 7 +- main.go | 7 ++ models/config/config.go | 27 +++--- resources/flags.go | 161 ++++++++++++++++--------------- resources/messages.go | 2 +- resources/scraper.go | 6 +- workflows/downloadScrapedData.go | 39 ++++++++ 7 files changed, 156 insertions(+), 93 deletions(-) create mode 100644 workflows/downloadScrapedData.go diff --git a/generator/resources.go b/generator/resources.go index 0bb38ee..c3e29a9 100644 --- a/generator/resources.go +++ b/generator/resources.go @@ -34,7 +34,8 @@ var ( "ItemsFoundInArchive": "%d items, found in archive. Skipping...\n", "Downloaded": "\r[%d/%d] Downloaded", "UsageLine": "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n" + - " or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt", + " or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n" + + " or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json", }, }, resource{ @@ -49,6 +50,10 @@ var ( "BatchFlag": "batch-file", "BatchDefault": "", "BatchDescription": "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.", + // ScrapedData + "ScrapedDataFlag": "scraped-data", + "ScrapedDataDefault": "", + "ScrapedDataDescription": "Download videos from scrape file (json format)", // Archive "ArchiveFlag": "archive", "ArchiveDefault": "", diff --git a/main.go b/main.go index e1b4008..5b137f9 100644 --- a/main.go +++ b/main.go @@ -9,6 +9,7 @@ func main() { config.GetConfig() url := config.Config.URL batchFilePath := config.Config.BatchFilePath + scrapedDataFilePath := config.Config.ScrapedDataFilePath // Batch file if workflows.CanUseDownloadBatchFile(batchFilePath) { @@ -16,5 +17,11 @@ func main() { return } + // Scraped data file + if workflows.CanUseDownloadScrapedData(scrapedDataFilePath) { + workflows.DownloadScrapedData(scrapedDataFilePath) + return + } + workflows.StartWorkflowByParameter(url) } diff --git a/models/config/config.go b/models/config/config.go index 37b3ab0..f9632df 100644 --- a/models/config/config.go +++ b/models/config/config.go @@ -11,23 +11,25 @@ import ( // Config - Runtime configuration var Config struct { - URL string - OutputPath string - BatchFilePath string - ArchiveFilePath string - FailLogFilePath string - Debug bool - MetaData bool - Quiet bool - JSONOnly bool - Deadline int - Limit int + URL string + OutputPath string + BatchFilePath string + ScrapedDataFilePath string + ArchiveFilePath string + FailLogFilePath string + Debug bool + MetaData bool + Quiet bool + JSONOnly bool + Deadline int + Limit int } // GetConfig - Returns Config object func GetConfig() { outputPath := flag.String(res.OutputFlag, res.OutputDefault, res.OutputDescription) batchFilePath := flag.String(res.BatchFlag, res.BatchDefault, res.BatchDescription) + scrapedDataFilePath := flag.String(res.ScrapedDataFlag, res.ScrapedDataDefault, res.ScrapedDataDescription) archive := flag.String(res.ArchiveFlag, res.ArchiveDefault, res.ArchiveDescription) failLogPath := flag.String(res.FailLogFlag, res.FailLogDefault, res.FailLogDescription) debug := flag.Bool(res.DebugFlag, parseBool(res.DebugDefault), res.DebugDescription) @@ -39,7 +41,7 @@ func GetConfig() { flag.Parse() args := flag.Args() - if len(args) < 1 && *batchFilePath == "" { + if len(args) < 1 && *batchFilePath == "" && *scrapedDataFilePath == "" { fmt.Println(res.UsageLine) os.Exit(2) } @@ -51,6 +53,7 @@ func GetConfig() { } Config.OutputPath = *outputPath Config.BatchFilePath = *batchFilePath + Config.ScrapedDataFilePath = *scrapedDataFilePath Config.ArchiveFilePath = *archive Config.FailLogFilePath = *failLogPath Config.Debug = *debug diff --git a/resources/flags.go b/resources/flags.go index 96f43ec..c348006 100644 --- a/resources/flags.go +++ b/resources/flags.go @@ -3,92 +3,101 @@ // Check `/generator/resources.go` to change generated content package resources -//BatchDescription - -var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored." - -//LimitDefault - -var LimitDefault = "0" - -//DebugDescription - -var DebugDescription = "Enables debug mode" +//FailLogDefault - +var FailLogDefault = "" //QuietDescription - var QuietDescription = "Suppress output" -//MetadataDefault - -var MetadataDefault = "false" - -//LimitDescription - -var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)" - -//OutputDescription - -var OutputDescription = "Output path" - -//BatchDefault - -var BatchDefault = "" - -//FailLogDefault - -var FailLogDefault = "" - -//MetadataFlag - -var MetadataFlag = "metadata" - -//ArchiveFlag - -var ArchiveFlag = "archive" - -//MetadataDescription - -var MetadataDescription = "Write video metadata to a .json file" - -//QuietDefault - -var QuietDefault = "false" - -//DeadlineDefault - -var DeadlineDefault = "1500" - -//JsonFlag - -var JsonFlag = "json" - -//JsonDefault - -var JsonDefault = "false" - -//DeadlineDescription - -var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)" - -//OutputFlag - -var OutputFlag = "output" - -//OutputDefault - -var OutputDefault = "./downloads" - -//FailLogFlag - -var FailLogFlag = "fail-log" - -//DebugDefault - -var DebugDefault = "false" - -//ArchiveDefault - -var ArchiveDefault = "" - -//FailLogDescription - -var FailLogDescription = "Write failed items to log file" - -//BatchFlag - -var BatchFlag = "batch-file" - -//QuietFlag - -var QuietFlag = "quiet" - -//DeadlineFlag - -var DeadlineFlag = "deadline" - -//LimitFlag - -var LimitFlag = "limit" +//ScrapedDataDefault - +var ScrapedDataDefault = "" //ArchiveDescription - var ArchiveDescription = "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it." +//FailLogFlag - +var FailLogFlag = "fail-log" + +//QuietDefault - +var QuietDefault = "false" + +//JsonFlag - +var JsonFlag = "json" + +//ScrapedDataFlag - +var ScrapedDataFlag = "scraped-data" + //DebugFlag - var DebugFlag = "debug" +//MetadataDefault - +var MetadataDefault = "false" + +//DeadlineFlag - +var DeadlineFlag = "deadline" + +//OutputFlag - +var OutputFlag = "output" + +//BatchFlag - +var BatchFlag = "batch-file" + +//ArchiveFlag - +var ArchiveFlag = "archive" + +//ArchiveDefault - +var ArchiveDefault = "" + +//JsonDefault - +var JsonDefault = "false" + //JsonDescription - var JsonDescription = "Just get JSON data from scraper (without video downloading)" + +//DeadlineDescription - +var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)" + +//OutputDefault - +var OutputDefault = "./downloads" + +//OutputDescription - +var OutputDescription = "Output path" + +//BatchDescription - +var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored." + +//LimitFlag - +var LimitFlag = "limit" + +//LimitDescription - +var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)" + +//ScrapedDataDescription - +var ScrapedDataDescription = "Download videos from scrape file (json format)" + +//DebugDescription - +var DebugDescription = "Enables debug mode" + +//MetadataFlag - +var MetadataFlag = "metadata" + +//BatchDefault - +var BatchDefault = "" + +//MetadataDescription - +var MetadataDescription = "Write video metadata to a .json file" + +//LimitDefault - +var LimitDefault = "0" + +//DeadlineDefault - +var DeadlineDefault = "1500" + +//FailLogDescription - +var FailLogDescription = "Write failed items to log file" + +//DebugDefault - +var DebugDefault = "false" + +//QuietFlag - +var QuietFlag = "quiet" diff --git a/resources/messages.go b/resources/messages.go index a56907c..747e55c 100644 --- a/resources/messages.go +++ b/resources/messages.go @@ -19,4 +19,4 @@ var ItemsFoundInArchive = "%d items, found in archive. Skipping...\n" var Downloaded = "\r[%d/%d] Downloaded" //UsageLine - -var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt" +var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json" diff --git a/resources/scraper.go b/resources/scraper.go index 673d7fc..e9b04ad 100644 --- a/resources/scraper.go +++ b/resources/scraper.go @@ -3,8 +3,8 @@ // Check `/generator/resources.go` to change generated content package resources -//ScraperScript - -var ScraperScript = "optStrings={selectors:{feedLoading:\"div.tiktok-loading.feed-loading\",modalArrowRight:\"div.video-card-modal > div > img.arrow-right\",modalClose:\".video-card-modal > div > div.close\",modalPlayer:\"div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video\",modalShareInput:\".copy-link-container > input\",modalCaption:\"div.video-card-big > div.content-container > div.video-meta-info > h1\",modalSoundLink:\"div.content-container > div.video-meta-info > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > div > video\",videoShareInput:\"div.content-container.border > div.copy-link-container > input\",videoCaption:\"div.content-container.border > div.video-meta-info > h1\",videoSoundLink:\"div.content-container.border > div.video-meta-info > h2.music-info > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var i=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];i.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):i.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.modalShareInput).value,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),i=document.querySelector(optStrings.selectors.modalUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.videoShareInput).value,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),i=document.querySelector(optStrings.selectors.videoUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var i=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(i!==t.count)currentState.preloadCount=t.count;else{if(document.querySelector(optStrings.selectors.feedLoading))return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();" - //ScraperPath - var ScraperPath = "scraper.js" + +//ScraperScript - +var ScraperScript = "optStrings={selectors:{feedLoading:\".tiktok-ui-loading-container\",modalArrowRight:\"div > div.video-card-container > img.arrow-right\",modalClose:\"div > div.video-card-container > img.control-icon.close\",modalPlayer:\"div.video-card-container > div.video-card-browse > video\",modalCaption:\"div.content-container > div.video-infos-container > h1\",modalSoundLink:\"div.content-container > div.video-infos-container > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > video\",videoCaption:\"div.content-container > div.video-infos-container > h1\",videoSoundLink:\"div.content-container > div.video-infos-container > h2 > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var n=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];!n||n.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):n.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),n=document.querySelector(optStrings.selectors.modalUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),n=document.querySelector(optStrings.selectors.videoUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var n=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(n!==t.count)currentState.preloadCount=t.count;else{if(isLoading())return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},isLoading=function(){var e=document.querySelector(optStrings.selectors.feedLoading);return e&&0!=e.getClientRects().length},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();" diff --git a/workflows/downloadScrapedData.go b/workflows/downloadScrapedData.go new file mode 100644 index 0000000..2bbd112 --- /dev/null +++ b/workflows/downloadScrapedData.go @@ -0,0 +1,39 @@ +package workflows + +import ( + "fmt" + models "github.com/pikami/tiktok-dl/models" + config "github.com/pikami/tiktok-dl/models/config" + res "github.com/pikami/tiktok-dl/resources" + utils "github.com/pikami/tiktok-dl/utils" + fileio "github.com/pikami/tiktok-dl/utils/fileio" + log "github.com/pikami/tiktok-dl/utils/log" +) + +// CanUseDownloadScrapedData - Check's if DownloadScrapedData can be used +func CanUseDownloadScrapedData(scrapedDataFilePath string) bool { + return scrapedDataFilePath != "" +} + +// DownloadScrapedData - Download items from scraped data file +func DownloadScrapedData(scrapedDataFilePath string) { + if !fileio.CheckIfExists(scrapedDataFilePath) { + log.LogFatal(res.ErrorPathNotFound, scrapedDataFilePath) + } + + dataFileContent := fileio.ReadFileToString(scrapedDataFilePath) + uploads := models.ParseUploads(dataFileContent) + uploads = utils.RemoveArchivedItems(uploads) + + uploadCount := len(uploads) + + for index, upload := range uploads { + username := utils.GetUsernameFromString(upload.Uploader) + downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username) + + fileio.InitOutputDirectory(downloadDir) + downloadVideo(upload, downloadDir) + log.Logf(res.Downloaded, index+1, uploadCount) + } + log.Log() +}