Added ability to download from scrapeData (json)

2026-06-30 18:17:45 +01:00 · 2020-08-16 18:56:30 +03:00
parent 7a25f521cb
commit e2e4ba0d4b
7 changed files with 156 additions and 93 deletions
@@ -34,7 +34,8 @@ var (
 				"ItemsFoundInArchive":  "%d items, found in archive. Skipping...\n",
 				"Downloaded":           "\r[%d/%d] Downloaded",
 				"UsageLine": "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n" +
-					"  or:  tiktok-dl [OPTIONS] -batch-file path/to/users.txt",
+					"  or:  tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n" +
+					"  or:  tiktok-dl [OPTIONS] -scraped-data path/to/data.json",
 			},
 		},
 		resource{
@@ -49,6 +50,10 @@ var (
 				"BatchFlag":        "batch-file",
 				"BatchDefault":     "",
 				"BatchDescription": "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.",
+				// ScrapedData
+				"ScrapedDataFlag":        "scraped-data",
+				"ScrapedDataDefault":     "",
+				"ScrapedDataDescription": "Download videos from scrape file (json format)",
 				// Archive
 				"ArchiveFlag":        "archive",
 				"ArchiveDefault":     "",
@@ -9,6 +9,7 @@ func main() {
 	config.GetConfig()
 	url := config.Config.URL
 	batchFilePath := config.Config.BatchFilePath
+	scrapedDataFilePath := config.Config.ScrapedDataFilePath

 	// Batch file
 	if workflows.CanUseDownloadBatchFile(batchFilePath) {
@@ -16,5 +17,11 @@ func main() {
 		return
 	}

+	// Scraped data file
+	if workflows.CanUseDownloadScrapedData(scrapedDataFilePath) {
+		workflows.DownloadScrapedData(scrapedDataFilePath)
+		return
+	}
+
 	workflows.StartWorkflowByParameter(url)
 }
@@ -14,6 +14,7 @@ var Config struct {
 	URL                 string
 	OutputPath          string
 	BatchFilePath       string
+	ScrapedDataFilePath string
 	ArchiveFilePath     string
 	FailLogFilePath     string
 	Debug               bool
@@ -28,6 +29,7 @@ var Config struct {
 func GetConfig() {
 	outputPath := flag.String(res.OutputFlag, res.OutputDefault, res.OutputDescription)
 	batchFilePath := flag.String(res.BatchFlag, res.BatchDefault, res.BatchDescription)
+	scrapedDataFilePath := flag.String(res.ScrapedDataFlag, res.ScrapedDataDefault, res.ScrapedDataDescription)
 	archive := flag.String(res.ArchiveFlag, res.ArchiveDefault, res.ArchiveDescription)
 	failLogPath := flag.String(res.FailLogFlag, res.FailLogDefault, res.FailLogDescription)
 	debug := flag.Bool(res.DebugFlag, parseBool(res.DebugDefault), res.DebugDescription)
@@ -39,7 +41,7 @@ func GetConfig() {
 	flag.Parse()

 	args := flag.Args()
-	if len(args) < 1 && *batchFilePath == "" {
+	if len(args) < 1 && *batchFilePath == "" && *scrapedDataFilePath == "" {
 		fmt.Println(res.UsageLine)
 		os.Exit(2)
 	}
@@ -51,6 +53,7 @@ func GetConfig() {
 	}
 	Config.OutputPath = *outputPath
 	Config.BatchFilePath = *batchFilePath
+	Config.ScrapedDataFilePath = *scrapedDataFilePath
 	Config.ArchiveFilePath = *archive
 	Config.FailLogFilePath = *failLogPath
 	Config.Debug = *debug
@@ -3,92 +3,101 @@
 // Check `/generator/resources.go` to change generated content
 package resources

-//BatchDescription -
-var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored."
-
-//LimitDefault -
-var LimitDefault = "0"
-
-//DebugDescription -
-var DebugDescription = "Enables debug mode"
+//FailLogDefault -
+var FailLogDefault = ""

 //QuietDescription -
 var QuietDescription = "Suppress output"

-//MetadataDefault -
-var MetadataDefault = "false"
-
-//LimitDescription -
-var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
-
-//OutputDescription -
-var OutputDescription = "Output path"
-
-//BatchDefault -
-var BatchDefault = ""
-
-//FailLogDefault -
-var FailLogDefault = ""
-
-//MetadataFlag -
-var MetadataFlag = "metadata"
-
-//ArchiveFlag -
-var ArchiveFlag = "archive"
-
-//MetadataDescription -
-var MetadataDescription = "Write video metadata to a .json file"
-
-//QuietDefault -
-var QuietDefault = "false"
-
-//DeadlineDefault -
-var DeadlineDefault = "1500"
-
-//JsonFlag -
-var JsonFlag = "json"
-
-//JsonDefault -
-var JsonDefault = "false"
-
-//DeadlineDescription -
-var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
-
-//OutputFlag -
-var OutputFlag = "output"
-
-//OutputDefault -
-var OutputDefault = "./downloads"
-
-//FailLogFlag -
-var FailLogFlag = "fail-log"
-
-//DebugDefault -
-var DebugDefault = "false"
-
-//ArchiveDefault -
-var ArchiveDefault = ""
-
-//FailLogDescription -
-var FailLogDescription = "Write failed items to log file"
-
-//BatchFlag -
-var BatchFlag = "batch-file"
-
-//QuietFlag -
-var QuietFlag = "quiet"
-
-//DeadlineFlag -
-var DeadlineFlag = "deadline"
-
-//LimitFlag -
-var LimitFlag = "limit"
+//ScrapedDataDefault -
+var ScrapedDataDefault = ""

 //ArchiveDescription -
 var ArchiveDescription = "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it."

+//FailLogFlag -
+var FailLogFlag = "fail-log"
+
+//QuietDefault -
+var QuietDefault = "false"
+
+//JsonFlag -
+var JsonFlag = "json"
+
+//ScrapedDataFlag -
+var ScrapedDataFlag = "scraped-data"
+
 //DebugFlag -
 var DebugFlag = "debug"

+//MetadataDefault -
+var MetadataDefault = "false"
+
+//DeadlineFlag -
+var DeadlineFlag = "deadline"
+
+//OutputFlag -
+var OutputFlag = "output"
+
+//BatchFlag -
+var BatchFlag = "batch-file"
+
+//ArchiveFlag -
+var ArchiveFlag = "archive"
+
+//ArchiveDefault -
+var ArchiveDefault = ""
+
+//JsonDefault -
+var JsonDefault = "false"
+
 //JsonDescription -
 var JsonDescription = "Just get JSON data from scraper (without video downloading)"
+
+//DeadlineDescription -
+var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
+
+//OutputDefault -
+var OutputDefault = "./downloads"
+
+//OutputDescription -
+var OutputDescription = "Output path"
+
+//BatchDescription -
+var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored."
+
+//LimitFlag -
+var LimitFlag = "limit"
+
+//LimitDescription -
+var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
+
+//ScrapedDataDescription -
+var ScrapedDataDescription = "Download videos from scrape file (json format)"
+
+//DebugDescription -
+var DebugDescription = "Enables debug mode"
+
+//MetadataFlag -
+var MetadataFlag = "metadata"
+
+//BatchDefault -
+var BatchDefault = ""
+
+//MetadataDescription -
+var MetadataDescription = "Write video metadata to a .json file"
+
+//LimitDefault -
+var LimitDefault = "0"
+
+//DeadlineDefault -
+var DeadlineDefault = "1500"
+
+//FailLogDescription -
+var FailLogDescription = "Write failed items to log file"
+
+//DebugDefault -
+var DebugDefault = "false"
+
+//QuietFlag -
+var QuietFlag = "quiet"
@@ -19,4 +19,4 @@ var ItemsFoundInArchive = "%d items, found in archive. Skipping...\n"
 var Downloaded = "\r[%d/%d] Downloaded"

 //UsageLine -
-var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n  or:  tiktok-dl [OPTIONS] -batch-file path/to/users.txt"
+var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n  or:  tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n  or:  tiktok-dl [OPTIONS] -scraped-data path/to/data.json"
@@ -3,8 +3,8 @@
 // Check `/generator/resources.go` to change generated content
 package resources

-//ScraperScript -
-var ScraperScript = "optStrings={selectors:{feedLoading:\"div.tiktok-loading.feed-loading\",modalArrowRight:\"div.video-card-modal > div > img.arrow-right\",modalClose:\".video-card-modal > div > div.close\",modalPlayer:\"div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video\",modalShareInput:\".copy-link-container > input\",modalCaption:\"div.video-card-big > div.content-container > div.video-meta-info > h1\",modalSoundLink:\"div.content-container > div.video-meta-info > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > div > video\",videoShareInput:\"div.content-container.border > div.copy-link-container > input\",videoCaption:\"div.content-container.border > div.video-meta-info > h1\",videoSoundLink:\"div.content-container.border > div.video-meta-info > h2.music-info > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var i=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];i.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):i.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.modalShareInput).value,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),i=document.querySelector(optStrings.selectors.modalUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.videoShareInput).value,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),i=document.querySelector(optStrings.selectors.videoUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var i=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(i!==t.count)currentState.preloadCount=t.count;else{if(document.querySelector(optStrings.selectors.feedLoading))return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"
-
 //ScraperPath -
 var ScraperPath = "scraper.js"
+
+//ScraperScript -
+var ScraperScript = "optStrings={selectors:{feedLoading:\".tiktok-ui-loading-container\",modalArrowRight:\"div > div.video-card-container > img.arrow-right\",modalClose:\"div > div.video-card-container > img.control-icon.close\",modalPlayer:\"div.video-card-container > div.video-card-browse > video\",modalCaption:\"div.content-container > div.video-infos-container > h1\",modalSoundLink:\"div.content-container > div.video-infos-container > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > video\",videoCaption:\"div.content-container > div.video-infos-container > h1\",videoSoundLink:\"div.content-container > div.video-infos-container > h2 > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var n=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];!n||n.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):n.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),n=document.querySelector(optStrings.selectors.modalUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),n=document.querySelector(optStrings.selectors.videoUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var n=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(n!==t.count)currentState.preloadCount=t.count;else{if(isLoading())return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},isLoading=function(){var e=document.querySelector(optStrings.selectors.feedLoading);return e&&0!=e.getClientRects().length},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"
@@ -0,0 +1,39 @@
+package workflows
+
+import (
+	"fmt"
+	models "github.com/pikami/tiktok-dl/models"
+	config "github.com/pikami/tiktok-dl/models/config"
+	res "github.com/pikami/tiktok-dl/resources"
+	utils "github.com/pikami/tiktok-dl/utils"
+	fileio "github.com/pikami/tiktok-dl/utils/fileio"
+	log "github.com/pikami/tiktok-dl/utils/log"
+)
+
+// CanUseDownloadScrapedData - Check's if DownloadScrapedData can be used
+func CanUseDownloadScrapedData(scrapedDataFilePath string) bool {
+	return scrapedDataFilePath != ""
+}
+
+// DownloadScrapedData - Download items from scraped data file
+func DownloadScrapedData(scrapedDataFilePath string) {
+	if !fileio.CheckIfExists(scrapedDataFilePath) {
+		log.LogFatal(res.ErrorPathNotFound, scrapedDataFilePath)
+	}
+
+	dataFileContent := fileio.ReadFileToString(scrapedDataFilePath)
+	uploads := models.ParseUploads(dataFileContent)
+	uploads = utils.RemoveArchivedItems(uploads)
+
+	uploadCount := len(uploads)
+
+	for index, upload := range uploads {
+		username := utils.GetUsernameFromString(upload.Uploader)
+		downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
+
+		fileio.InitOutputDirectory(downloadDir)
+		downloadVideo(upload, downloadDir)
+		log.Logf(res.Downloaded, index+1, uploadCount)
+	}
+	log.Log()
+}