I wrote a JavaScript script to export YouTube’s Watch Later data to CSV, so I’ll share it a bit.
Feature: Save the video list from the Watch Later page as CSV in title,URL format.
Bug: The order is random.
Usage:
Go to the Watch Later page, open Chrome Developer Tools, and copy-paste the following source code.
Note: You should wait a few seconds before executing just the last console.save(csv,'result.csv').
function getElementsByXPath(xpath, parent)
{
let results = [];
let query = document.evaluate(xpath,
parent || document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i=0, length=query.snapshotLength; i<length; ++i) {
results.push(query.snapshotItem(i));
}
return results;
}
(function(console){
console.save = function(data, filename){
if(!data) {
console.error('Console.save: No data')
return;
}
if(!filename) filename = 'console.json'
if(typeof data === "object"){
data = JSON.stringify(data, undefined, 4)
}
var blob = new Blob([data], {type: 'text/json'}),
e = document.createEvent('MouseEvents'),
a = document.createElement('a')
a.download = filename
a.href = window.URL.createObjectURL(blob)
a.dataset.downloadurl = ['text/json', a.download, a.href].join(':')
e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null)
a.dispatchEvent(e)
}
})(console)
var absolutePath = function(href) {
var link = document.createElement("a");
link.href = href;
return link.href;
}
var xpath = '//*[@id="content"]/a' ;
var csv ;
for (i = 1; getElementsByXPath(xpath).length; i++) {
text = getElementsByXPath('//*[@id="content"]/a/div/h3/span')[i].getAttribute("title");
text = text.replace(/,/g, "、");
text = '"' + text + '"'
csv += text
csv += ','
// csv += "\n" ;
text = getElementsByXPath(xpath)[i].getAttribute("href")
text = absolutePath(text) ;
text = text.replace(/,/g, "、");
text = '"' + text + '"'
csv += text
// csv += ','
csv += "\n" ;
}
console.save(csv,'result.csv')
This JavaScript program retrieves elements based on a specific XPath path on a web page, extracts data from those elements, and saves it to a CSV file. Here’s a step-by-step explanation of the program.
-
getElementsByXPathfunction:- A function that receives an XPath expression and parent element, and retrieves elements matching the specified XPath.
- Uses
document.evaluateto evaluate the XPath and retrieve matching elements.
-
(function(console){...})(console)part:- Code to extend the
console.savefunction. - The
console.savefunction adds a custom function for saving data to a file.
- Code to extend the
-
absolutePathfunction:- A function to convert relative URLs to absolute URLs.
- Converts the given relative URL to an absolute URL using an
<a>element and returns it.
-
xpathvariable:- Specifies the XPath expression. This XPath expression is used to extract
<a>elements on the page in a specific way.
- Specifies the XPath expression. This XPath expression is used to extract
-
csvvariable:- A variable to store CSV data.
-
forloop:- Loops until
getElementsByXPath(xpath).length. This condition loops until the number of elements becomes 0.
- Loops until
-
Processing inside the loop:
- Uses
getElementsByXPath('//*[@id="content"]/a/div/h3/span')[i].getAttribute("title")to get title text from the specified XPath. - The retrieved text data is added to the
csvvariable as a CSV cell separated by commas (,). The text data is enclosed in double quotes. - Uses
getElementsByXPath(xpath)[i].getAttribute("href")to get the relative URL of the link from the element’s “href” attribute. - Converts the relative URL to an absolute URL using the
absolutePathfunction, encloses it in double quotes, and adds it to thecsvvariable. - Separates cells with commas and adds a newline character (
\n) at the end of the row.
- Uses
-
console.save(csv, 'result.csv'):- Uses the
console.savefunction to download the contents of thecsvvariable as a CSV file. The filename is ‘result.csv’. This function is custom-defined and serves the role of saving CSV data to a file.
- Uses the
This program retrieves titles and links from elements matching the specified XPath and saves them to a CSV file.