Added ability to download from scrapeData (json)

This commit is contained in:
Pijus Kamandulis 2020-08-16 18:56:30 +03:00
parent 7a25f521cb
commit e2e4ba0d4b
7 changed files with 156 additions and 93 deletions

View File

@ -34,7 +34,8 @@ var (
"ItemsFoundInArchive": "%d items, found in archive. Skipping...\n",
"Downloaded": "\r[%d/%d] Downloaded",
"UsageLine": "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n" +
" or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt",
" or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n" +
" or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json",
},
},
resource{
@ -49,6 +50,10 @@ var (
"BatchFlag": "batch-file",
"BatchDefault": "",
"BatchDescription": "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.",
// ScrapedData
"ScrapedDataFlag": "scraped-data",
"ScrapedDataDefault": "",
"ScrapedDataDescription": "Download videos from scrape file (json format)",
// Archive
"ArchiveFlag": "archive",
"ArchiveDefault": "",

View File

@ -9,6 +9,7 @@ func main() {
config.GetConfig()
url := config.Config.URL
batchFilePath := config.Config.BatchFilePath
scrapedDataFilePath := config.Config.ScrapedDataFilePath
// Batch file
if workflows.CanUseDownloadBatchFile(batchFilePath) {
@ -16,5 +17,11 @@ func main() {
return
}
// Scraped data file
if workflows.CanUseDownloadScrapedData(scrapedDataFilePath) {
workflows.DownloadScrapedData(scrapedDataFilePath)
return
}
workflows.StartWorkflowByParameter(url)
}

View File

@ -11,23 +11,25 @@ import (
// Config - Runtime configuration
var Config struct {
URL string
OutputPath string
BatchFilePath string
ArchiveFilePath string
FailLogFilePath string
Debug bool
MetaData bool
Quiet bool
JSONOnly bool
Deadline int
Limit int
URL string
OutputPath string
BatchFilePath string
ScrapedDataFilePath string
ArchiveFilePath string
FailLogFilePath string
Debug bool
MetaData bool
Quiet bool
JSONOnly bool
Deadline int
Limit int
}
// GetConfig - Returns Config object
func GetConfig() {
outputPath := flag.String(res.OutputFlag, res.OutputDefault, res.OutputDescription)
batchFilePath := flag.String(res.BatchFlag, res.BatchDefault, res.BatchDescription)
scrapedDataFilePath := flag.String(res.ScrapedDataFlag, res.ScrapedDataDefault, res.ScrapedDataDescription)
archive := flag.String(res.ArchiveFlag, res.ArchiveDefault, res.ArchiveDescription)
failLogPath := flag.String(res.FailLogFlag, res.FailLogDefault, res.FailLogDescription)
debug := flag.Bool(res.DebugFlag, parseBool(res.DebugDefault), res.DebugDescription)
@ -39,7 +41,7 @@ func GetConfig() {
flag.Parse()
args := flag.Args()
if len(args) < 1 && *batchFilePath == "" {
if len(args) < 1 && *batchFilePath == "" && *scrapedDataFilePath == "" {
fmt.Println(res.UsageLine)
os.Exit(2)
}
@ -51,6 +53,7 @@ func GetConfig() {
}
Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath
Config.ScrapedDataFilePath = *scrapedDataFilePath
Config.ArchiveFilePath = *archive
Config.FailLogFilePath = *failLogPath
Config.Debug = *debug

View File

@ -3,92 +3,101 @@
// Check `/generator/resources.go` to change generated content
package resources
//BatchDescription -
var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored."
//LimitDefault -
var LimitDefault = "0"
//DebugDescription -
var DebugDescription = "Enables debug mode"
//FailLogDefault -
var FailLogDefault = ""
//QuietDescription -
var QuietDescription = "Suppress output"
//MetadataDefault -
var MetadataDefault = "false"
//LimitDescription -
var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
//OutputDescription -
var OutputDescription = "Output path"
//BatchDefault -
var BatchDefault = ""
//FailLogDefault -
var FailLogDefault = ""
//MetadataFlag -
var MetadataFlag = "metadata"
//ArchiveFlag -
var ArchiveFlag = "archive"
//MetadataDescription -
var MetadataDescription = "Write video metadata to a .json file"
//QuietDefault -
var QuietDefault = "false"
//DeadlineDefault -
var DeadlineDefault = "1500"
//JsonFlag -
var JsonFlag = "json"
//JsonDefault -
var JsonDefault = "false"
//DeadlineDescription -
var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
//OutputFlag -
var OutputFlag = "output"
//OutputDefault -
var OutputDefault = "./downloads"
//FailLogFlag -
var FailLogFlag = "fail-log"
//DebugDefault -
var DebugDefault = "false"
//ArchiveDefault -
var ArchiveDefault = ""
//FailLogDescription -
var FailLogDescription = "Write failed items to log file"
//BatchFlag -
var BatchFlag = "batch-file"
//QuietFlag -
var QuietFlag = "quiet"
//DeadlineFlag -
var DeadlineFlag = "deadline"
//LimitFlag -
var LimitFlag = "limit"
//ScrapedDataDefault -
var ScrapedDataDefault = ""
//ArchiveDescription -
var ArchiveDescription = "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it."
//FailLogFlag -
var FailLogFlag = "fail-log"
//QuietDefault -
var QuietDefault = "false"
//JsonFlag -
var JsonFlag = "json"
//ScrapedDataFlag -
var ScrapedDataFlag = "scraped-data"
//DebugFlag -
var DebugFlag = "debug"
//MetadataDefault -
var MetadataDefault = "false"
//DeadlineFlag -
var DeadlineFlag = "deadline"
//OutputFlag -
var OutputFlag = "output"
//BatchFlag -
var BatchFlag = "batch-file"
//ArchiveFlag -
var ArchiveFlag = "archive"
//ArchiveDefault -
var ArchiveDefault = ""
//JsonDefault -
var JsonDefault = "false"
//JsonDescription -
var JsonDescription = "Just get JSON data from scraper (without video downloading)"
//DeadlineDescription -
var DeadlineDescription = "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)"
//OutputDefault -
var OutputDefault = "./downloads"
//OutputDescription -
var OutputDescription = "Output path"
//BatchDescription -
var BatchDescription = "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored."
//LimitFlag -
var LimitFlag = "limit"
//LimitDescription -
var LimitDescription = "Sets the videos count limit (useful when there too many videos from the user or by hashtag)"
//ScrapedDataDescription -
var ScrapedDataDescription = "Download videos from scrape file (json format)"
//DebugDescription -
var DebugDescription = "Enables debug mode"
//MetadataFlag -
var MetadataFlag = "metadata"
//BatchDefault -
var BatchDefault = ""
//MetadataDescription -
var MetadataDescription = "Write video metadata to a .json file"
//LimitDefault -
var LimitDefault = "0"
//DeadlineDefault -
var DeadlineDefault = "1500"
//FailLogDescription -
var FailLogDescription = "Write failed items to log file"
//DebugDefault -
var DebugDefault = "false"
//QuietFlag -
var QuietFlag = "quiet"

View File

@ -19,4 +19,4 @@ var ItemsFoundInArchive = "%d items, found in archive. Skipping...\n"
var Downloaded = "\r[%d/%d] Downloaded"
//UsageLine -
var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt"
var UsageLine = "Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL\n or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt\n or: tiktok-dl [OPTIONS] -scraped-data path/to/data.json"

View File

@ -3,8 +3,8 @@
// Check `/generator/resources.go` to change generated content
package resources
//ScraperScript -
var ScraperScript = "optStrings={selectors:{feedLoading:\"div.tiktok-loading.feed-loading\",modalArrowRight:\"div.video-card-modal > div > img.arrow-right\",modalClose:\".video-card-modal > div > div.close\",modalPlayer:\"div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video\",modalShareInput:\".copy-link-container > input\",modalCaption:\"div.video-card-big > div.content-container > div.video-meta-info > h1\",modalSoundLink:\"div.content-container > div.video-meta-info > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > div > video\",videoShareInput:\"div.content-container.border > div.copy-link-container > input\",videoCaption:\"div.content-container.border > div.video-meta-info > h1\",videoSoundLink:\"div.content-container.border > div.video-meta-info > h2.music-info > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var i=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];i.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):i.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.modalShareInput).value,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),i=document.querySelector(optStrings.selectors.modalUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=document.querySelector(optStrings.selectors.videoShareInput).value,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),i=document.querySelector(optStrings.selectors.videoUploader).textContent,n=o.getAttribute(\"href\");return{url:e,shareLink:t,caption:r,uploader:i,sound:{title:o.text,link:n}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var i=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(i!==t.count)currentState.preloadCount=t.count;else{if(document.querySelector(optStrings.selectors.feedLoading))return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"
//ScraperPath -
var ScraperPath = "scraper.js"
//ScraperScript -
var ScraperScript = "optStrings={selectors:{feedLoading:\".tiktok-ui-loading-container\",modalArrowRight:\"div > div.video-card-container > img.arrow-right\",modalClose:\"div > div.video-card-container > img.control-icon.close\",modalPlayer:\"div.video-card-container > div.video-card-browse > video\",modalCaption:\"div.content-container > div.video-infos-container > h1\",modalSoundLink:\"div.content-container > div.video-infos-container > h2.music-info > a\",modalUploader:\".user-username\",videoPlayer:\"div.video-card-container > div > video\",videoCaption:\"div.content-container > div.video-infos-container > h1\",videoSoundLink:\"div.content-container > div.video-infos-container > h2 > a\",videoUploader:\".user-username\"},classes:{feedVideoItem:\"video-feed-item-wrapper\",modalCloseDisabled:\"disabled\",titleMessage:\"title\"},tags:{resultTag:\"video_urls\",resultParentTag:\"body\"},attributes:{src:\"src\"},tiktokMessages:[\"Couldn't find this account\",\"No videos yet\",\"Video currently unavailable\"]},currentState={preloadCount:0,finished:!1,limit:0},checkForErrors=function(){var e=document.getElementsByClassName(optStrings.classes.titleMessage);if(e&&e.length){var t=Array.from(e).find(e=>optStrings.tiktokMessages.includes(e.textContent)).textContent;if(t)return createVidUrlElement(\"ERR: \"+t),!0}return!1},createVidUrlElement=function(e){var t=document.createElement(optStrings.tags.resultTag);t.innerText=JSON.stringify(e),document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(t),currentState.finished=!0},buldVidUrlArray=function(e){document.getElementsByClassName(optStrings.classes.feedVideoItem)[0].click();var t=[],r=window.setInterval(o=>{t.push(getCurrentModalVideo()),currentState.limit>0&&t.length>=currentState.limit&&(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t));var n=document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];!n||n.classList.contains(optStrings.classes.modalCloseDisabled)?(window.clearInterval(r),document.querySelector(optStrings.selectors.modalClose).click(),e(t)):n.click()},20)},getCurrentModalVideo=function(){var e=document.querySelector(optStrings.selectors.modalPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.modalCaption).textContent,o=document.querySelector(optStrings.selectors.modalSoundLink),n=document.querySelector(optStrings.selectors.modalUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}},getCurrentVideo=function(){if(!checkForErrors()){var e=document.querySelector(optStrings.selectors.videoPlayer).getAttribute(optStrings.attributes.src),t=window.location.href,r=document.querySelector(optStrings.selectors.videoCaption).textContent,o=document.querySelector(optStrings.selectors.videoSoundLink),n=document.querySelector(optStrings.selectors.videoUploader).textContent,i=o?o.getAttribute(\"href\"):\"\";return{url:e,shareLink:t,caption:r,uploader:n,sound:{title:o?o.text:\"\",link:i}}}},scrollBottom=()=>window.scrollTo(0,document.body.scrollHeight),scrollWhileNew=function(e){var t={count:0},r=window.setInterval(o=>{scrollBottom();var n=t.count;if(t.count=document.getElementsByClassName(optStrings.classes.feedVideoItem).length,currentState.limit>0&&(currentState.preloadCount>=currentState.limit||t.count>=currentState.limit)&&(e(createVidUrlElement),window.clearInterval(r)),checkForErrors())window.clearInterval(r);else if(0!=t.count)if(n!==t.count)currentState.preloadCount=t.count;else{if(isLoading())return;window.clearInterval(r),e(createVidUrlElement)}},1e3)},isLoading=function(){var e=document.querySelector(optStrings.selectors.feedLoading);return e&&0!=e.getClientRects().length},bootstrapIteratingVideos=function(e){return currentState.limit=e,scrollWhileNew(buldVidUrlArray),\"bootstrapIteratingVideos\"},bootstrapGetCurrentVideo=function(){var e=getCurrentVideo();return createVidUrlElement(e),\"bootstrapGetCurrentVideo\"},init=()=>{const e=navigator.__proto__;return delete e.webdriver,navigator.__proto__=e,\"script initialized\"},init();"

View File

@ -0,0 +1,39 @@
package workflows
import (
"fmt"
models "github.com/pikami/tiktok-dl/models"
config "github.com/pikami/tiktok-dl/models/config"
res "github.com/pikami/tiktok-dl/resources"
utils "github.com/pikami/tiktok-dl/utils"
fileio "github.com/pikami/tiktok-dl/utils/fileio"
log "github.com/pikami/tiktok-dl/utils/log"
)
// CanUseDownloadScrapedData - Check's if DownloadScrapedData can be used
func CanUseDownloadScrapedData(scrapedDataFilePath string) bool {
return scrapedDataFilePath != ""
}
// DownloadScrapedData - Download items from scraped data file
func DownloadScrapedData(scrapedDataFilePath string) {
if !fileio.CheckIfExists(scrapedDataFilePath) {
log.LogFatal(res.ErrorPathNotFound, scrapedDataFilePath)
}
dataFileContent := fileio.ReadFileToString(scrapedDataFilePath)
uploads := models.ParseUploads(dataFileContent)
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
for index, upload := range uploads {
username := utils.GetUsernameFromString(upload.Uploader)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
fileio.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
log.Logf(res.Downloaded, index+1, uploadCount)
}
log.Log()
}