Added ability to download from scrapeData (json)

This commit is contained in:
Pijus Kamandulis 2020-08-16 18:56:30 +03:00
parent 7a25f521cb
commit e2e4ba0d4b
7 changed files with 156 additions and 93 deletions

View File

@ -34,7 +34,8 @@ var (
"ItemsFoundInArchive": "%d items, found in archive. Skipping...\n", "ItemsFoundInArchive": "%d items, found in archive. Skipping...\n",
"Downloaded": "\r[%d/%d] Downloaded", "Downloaded": "\r[%d/%d] Downloaded",
"UsageLine": "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n" + "UsageLine": "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n" +
" or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt", " or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n" +
" or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json",
}, },
}, },
resource{ resource{
@ -49,6 +50,10 @@ var (
"BatchFlag": "batch-file", "BatchFlag": "batch-file",
"BatchDefault": "", "BatchDefault": "",
"BatchDescription": "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.", "BatchDescription": "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.",
// ScrapedData
"ScrapedDataFlag": "scraped-data",
"ScrapedDataDefault": "",
"ScrapedDataDescription": "Download videos from scrape file (json format)",
// Archive // Archive
"ArchiveFlag": "archive", "ArchiveFlag": "archive",
"ArchiveDefault": "", "ArchiveDefault": "",

View File

@ -9,6 +9,7 @@ func main() {
config.GetConfig() config.GetConfig()
url := config.Config.URL url := config.Config.URL
batchFilePath := config.Config.BatchFilePath batchFilePath := config.Config.BatchFilePath
scrapedDataFilePath := config.Config.ScrapedDataFilePath
// Batch file // Batch file
if workflows.CanUseDownloadBatchFile(batchFilePath) { if workflows.CanUseDownloadBatchFile(batchFilePath) {
@ -16,5 +17,11 @@ func main() {
return return
} }
// Scraped data file
if workflows.CanUseDownloadScrapedData(scrapedDataFilePath) {
workflows.DownloadScrapedData(scrapedDataFilePath)
return
}
workflows.StartWorkflowByParameter(url) workflows.StartWorkflowByParameter(url)
} }

View File

@ -11,23 +11,25 @@ import (
// Config - Runtime configuration // Config - Runtime configuration
var Config struct { var Config struct {
URL string URL string
OutputPath string OutputPath string
BatchFilePath string BatchFilePath string
ArchiveFilePath string ScrapedDataFilePath string
FailLogFilePath string ArchiveFilePath string
Debug bool FailLogFilePath string
MetaData bool Debug bool
Quiet bool MetaData bool
JSONOnly bool Quiet bool
Deadline int JSONOnly bool
Limit int Deadline int
Limit int
} }
// GetConfig - Returns Config object // GetConfig - Returns Config object
func GetConfig() { func GetConfig() {
outputPath := flag.String(res.OutputFlag, res.OutputDefault, res.OutputDescription) outputPath := flag.String(res.OutputFlag, res.OutputDefault, res.OutputDescription)
batchFilePath := flag.String(res.BatchFlag, res.BatchDefault, res.BatchDescription) batchFilePath := flag.String(res.BatchFlag, res.BatchDefault, res.BatchDescription)
scrapedDataFilePath := flag.String(res.ScrapedDataFlag, res.ScrapedDataDefault, res.ScrapedDataDescription)
archive := flag.String(res.ArchiveFlag, res.ArchiveDefault, res.ArchiveDescription) archive := flag.String(res.ArchiveFlag, res.ArchiveDefault, res.ArchiveDescription)
failLogPath := flag.String(res.FailLogFlag, res.FailLogDefault, res.FailLogDescription) failLogPath := flag.String(res.FailLogFlag, res.FailLogDefault, res.FailLogDescription)
debug := flag.Bool(res.DebugFlag, parseBool(res.DebugDefault), res.DebugDescription) debug := flag.Bool(res.DebugFlag, parseBool(res.DebugDefault), res.DebugDescription)
@ -39,7 +41,7 @@ func GetConfig() {
flag.Parse() flag.Parse()
args := flag.Args() args := flag.Args()
if len(args) < 1 && *batchFilePath == "" { if len(args) < 1 && *batchFilePath == "" && *scrapedDataFilePath == "" {
fmt.Println(res.UsageLine) fmt.Println(res.UsageLine)
os.Exit(2) os.Exit(2)
} }
@ -51,6 +53,7 @@ func GetConfig() {
} }
Config.OutputPath = *outputPath Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath Config.BatchFilePath = *batchFilePath
Config.ScrapedDataFilePath = *scrapedDataFilePath
Config.ArchiveFilePath = *archive Config.ArchiveFilePath = *archive
Config.FailLogFilePath = *failLogPath Config.FailLogFilePath = *failLogPath
Config.Debug = *debug Config.Debug = *debug

View File

@ -3,92 +3,101 @@
// Check `/generator/resources.go` to change generated content // Check `/generator/resources.go` to change generated content
package resources package resources
//BatchDescription - //FailLogDefault -
var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored." var FailLogDefault = ""
//LimitDefault -
var LimitDefault = "0"
//DebugDescription -
var DebugDescription = "Enables debug mode"
//QuietDescription - //QuietDescription -
var QuietDescription = "Suppress output" var QuietDescription = "Suppress output"
//MetadataDefault - //ScrapedDataDefault -
var MetadataDefault = "false" var ScrapedDataDefault = ""
//LimitDescription -
var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
//OutputDescription -
var OutputDescription = "Output path"
//BatchDefault -
var BatchDefault = ""
//FailLogDefault -
var FailLogDefault = ""
//MetadataFlag -
var MetadataFlag = "metadata"
//ArchiveFlag -
var ArchiveFlag = "archive"
//MetadataDescription -
var MetadataDescription = "Write video metadata to a .json file"
//QuietDefault -
var QuietDefault = "false"
//DeadlineDefault -
var DeadlineDefault = "1500"
//JsonFlag -
var JsonFlag = "json"
//JsonDefault -
var JsonDefault = "false"
//DeadlineDescription -
var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
//OutputFlag -
var OutputFlag = "output"
//OutputDefault -
var OutputDefault = "./downloads"
//FailLogFlag -
var FailLogFlag = "fail-log"
//DebugDefault -
var DebugDefault = "false"
//ArchiveDefault -
var ArchiveDefault = ""
//FailLogDescription -
var FailLogDescription = "Write failed items to log file"
//BatchFlag -
var BatchFlag = "batch-file"
//QuietFlag -
var QuietFlag = "quiet"
//DeadlineFlag -
var DeadlineFlag = "deadline"
//LimitFlag -
var LimitFlag = "limit"
//ArchiveDescription - //ArchiveDescription -
var ArchiveDescription = "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it." var ArchiveDescription = "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it."
//FailLogFlag -
var FailLogFlag = "fail-log"
//QuietDefault -
var QuietDefault = "false"
//JsonFlag -
var JsonFlag = "json"
//ScrapedDataFlag -
var ScrapedDataFlag = "scraped-data"
//DebugFlag - //DebugFlag -
var DebugFlag = "debug" var DebugFlag = "debug"
//MetadataDefault -
var MetadataDefault = "false"
//DeadlineFlag -
var DeadlineFlag = "deadline"
//OutputFlag -
var OutputFlag = "output"
//BatchFlag -
var BatchFlag = "batch-file"
//ArchiveFlag -
var ArchiveFlag = "archive"
//ArchiveDefault -
var ArchiveDefault = ""
//JsonDefault -
var JsonDefault = "false"
//JsonDescription - //JsonDescription -
var JsonDescription = "Just get JSON data from scraper (without video downloading)" var JsonDescription = "Just get JSON data from scraper (without video downloading)"
//DeadlineDescription -
var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
//OutputDefault -
var OutputDefault = "./downloads"
//OutputDescription -
var OutputDescription = "Output path"
//BatchDescription -
var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored."
//LimitFlag -
var LimitFlag = "limit"
//LimitDescription -
var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
//ScrapedDataDescription -
var ScrapedDataDescription = "Download videos from scrape file (json format)"
//DebugDescription -
var DebugDescription = "Enables debug mode"
//MetadataFlag -
var MetadataFlag = "metadata"
//BatchDefault -
var BatchDefault = ""
//MetadataDescription -
var MetadataDescription = "Write video metadata to a .json file"
//LimitDefault -
var LimitDefault = "0"
//DeadlineDefault -
var DeadlineDefault = "1500"
//FailLogDescription -
var FailLogDescription = "Write failed items to log file"
//DebugDefault -
var DebugDefault = "false"
//QuietFlag -
var QuietFlag = "quiet"

View File

@ -19,4 +19,4 @@ var ItemsFoundInArchive = "%d items, found in archive. Skipping...\n"
var Downloaded = "\r[%d/%d] Downloaded" var Downloaded = "\r[%d/%d] Downloaded"
//UsageLine - //UsageLine -
var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt" var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json"

View File

@ -3,8 +3,8 @@
// Check `/generator/resources.go` to change generated content // Check `/generator/resources.go` to change generated content
package resources package resources
//ScraperScript -
var ScraperScript = "optStrings={selectors:{feedLoading:\"div.tiktok-loading.feed-loading\",modalArrowRight:\"div.video-card-modal > div > img.arrow-right\",modalClose:\".video-card-modal > div > div.close\",modalPlayer:\"div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video\",modalShareInput:\".copy-link-container > input\",modalCaption:\"div.video-card-big > div.content-container > div.video-meta-info > h1\",modalSoundLink:\"div.content-container > div.video-meta-info > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > div > video\",videoShareInput:\"div.content-container.border > div.copy-link-container > input\",videoCaption:\"div.content-container.border > div.video-meta-info > h1\",videoSoundLink:\"div.content-container.border > div.video-meta-info > h2.music-info > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var i=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];i.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):i.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.modalShareInput).value,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),i=document.querySelector(optStrings.selectors.modalUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.videoShareInput).value,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),i=document.querySelector(optStrings.selectors.videoUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var i=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(i!==t.count)currentState.preloadCount=t.count;else{if(document.querySelector(optStrings.selectors.feedLoading))return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"
//ScraperPath - //ScraperPath -
var ScraperPath = "scraper.js" var ScraperPath = "scraper.js"
//ScraperScript -
var ScraperScript = "optStrings={selectors:{feedLoading:\".tiktok-ui-loading-container\",modalArrowRight:\"div > div.video-card-container > img.arrow-right\",modalClose:\"div > div.video-card-container > img.control-icon.close\",modalPlayer:\"div.video-card-container > div.video-card-browse > video\",modalCaption:\"div.content-container > div.video-infos-container > h1\",modalSoundLink:\"div.content-container > div.video-infos-container > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > video\",videoCaption:\"div.content-container > div.video-infos-container > h1\",videoSoundLink:\"div.content-container > div.video-infos-container > h2 > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var n=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];!n||n.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):n.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),n=document.querySelector(optStrings.selectors.modalUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),n=document.querySelector(optStrings.selectors.videoUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var n=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(n!==t.count)currentState.preloadCount=t.count;else{if(isLoading())return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},isLoading=function(){var e=document.querySelector(optStrings.selectors.feedLoading);return e&&0!=e.getClientRects().length},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"

View File

@ -0,0 +1,39 @@
package workflows
import (
"fmt"
models "github.com/pikami/tiktok-dl/models"
config "github.com/pikami/tiktok-dl/models/config"
res "github.com/pikami/tiktok-dl/resources"
utils "github.com/pikami/tiktok-dl/utils"
fileio "github.com/pikami/tiktok-dl/utils/fileio"
log "github.com/pikami/tiktok-dl/utils/log"
)
// CanUseDownloadScrapedData - Check's if DownloadScrapedData can be used
func CanUseDownloadScrapedData(scrapedDataFilePath string) bool {
return scrapedDataFilePath != ""
}
// DownloadScrapedData - Download items from scraped data file
func DownloadScrapedData(scrapedDataFilePath string) {
if !fileio.CheckIfExists(scrapedDataFilePath) {
log.LogFatal(res.ErrorPathNotFound, scrapedDataFilePath)
}
dataFileContent := fileio.ReadFileToString(scrapedDataFilePath)
uploads := models.ParseUploads(dataFileContent)
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
for index, upload := range uploads {
username := utils.GetUsernameFromString(upload.Uploader)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
fileio.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
log.Logf(res.Downloaded, index+1, uploadCount)
}
log.Log()
}