gpt4 book ai didi

javascript - 使用 Google Apps Script 从 Google Slides 演示文稿中提取所有 URL 链接

转载 作者:行者123 更新时间:2023-12-04 07:14:58 26 4
gpt4 key购买 nike

我正在尝试创建一个函数,当传递 Google 幻灯片演示文稿 ID 时,该函数可以解析演示文稿并将它找到的所有 URL 链接写入 Google 表格。我已经构建了适应以下功能以对基于 this answer 的 Google Docs 文档输入执行相同操作来自@Yuval

function getAllLinks(docId, mergeAdjacent) {
var links = [];

//var doc = DocumentApp.getActiveDocument();
var doc = DocumentApp.openById(docId);
var parentDocName = doc.getName();
var ss=SpreadsheetApp.getActive();
var sh=ss.getSheetByName('Extracted Links');

iterateSections(doc, function(section, sectionIndex, isFirstPageSection) {
if (!("getParagraphs" in section)) {
// as we're using some undocumented API, adding this to avoid cryptic
// messages upon possible API changes.
throw new Error("An API change has caused this script to stop " +
"working.\n" +
"Section #" + sectionIndex + " of type " +
section.getType() + " has no .getParagraphs() method. " +
"Stopping script.");
}

section.getParagraphs().forEach(function(par) {
// skip empty paragraphs
if (par.getNumChildren() == 0) {
return;
}

// go over all text elements in paragraph / list-item
for (var el=par.getChild(0); el!=null; el=el.getNextSibling()) {
if (el.getType() != DocumentApp.ElementType.TEXT) {
continue;
}

// go over all styling segments in text element
var attributeIndices = el.getTextAttributeIndices();
var lastLink = null;
attributeIndices.forEach(function(startOffset, i, attributeIndices) {
var url = el.getLinkUrl(startOffset);

if (url != null) {
// we hit a link
var endOffsetInclusive = (i+1 < attributeIndices.length?
attributeIndices[i+1]-1 : null);

// check if this and the last found link are continuous
if (mergeAdjacent && lastLink != null && lastLink.url == url &&
lastLink.endOffsetInclusive == startOffset - 1) {
// this and the previous style segment are continuous
lastLink.endOffsetInclusive = endOffsetInclusive;
return;
}

lastLink = {
"section": section,
"isFirstPageSection": isFirstPageSection,
"paragraph": par,
"textEl": el,
"startOffset": startOffset,
"endOffsetInclusive": endOffsetInclusive,
"url": url
};
var row = sh.getLastRow() + 1;
var r1=sh.getRange(row, 1);
r1.setValue(parentDocName);
var r2=sh.getRange(row, 2);
r2.setValue(url);
Logger.log(parentDocName)
Logger.log(url)
links.push(lastLink);
}
});
}
});
});


return links;
}

/**
* Calls the given function for each section of the document (body, header,
* etc.). Sections are children of the DocumentElement object.
*
* @param {Document} doc The Document object (such as the one obtained via
* a call to DocumentApp.getActiveDocument()) with the sections to iterate
* over.
* @param {Function} func A callback function which will be called, for each
* section, with the following arguments (in order):
* - {ContainerElement} section - the section element
* - {Number} sectionIndex - the child index of the section, such that
* doc.getBody().getParent().getChild(sectionIndex) == section.
* - {Boolean} isFirstPageSection - whether the section is a first-page
* header/footer section.
*/
function iterateSections(doc, func) {
// get the DocumentElement interface to iterate over all sections
// this bit is undocumented API
var docEl = doc.getBody().getParent();

var regularHeaderSectionIndex = (doc.getHeader() == null? -1 :
docEl.getChildIndex(doc.getHeader()));
var regularFooterSectionIndex = (doc.getFooter() == null? -1 :
docEl.getChildIndex(doc.getFooter()));

for (var i=0; i<docEl.getNumChildren(); ++i) {
var section = docEl.getChild(i);

var sectionType = section.getType();
var uniqueSectionName;
var isFirstPageSection = (
i != regularHeaderSectionIndex &&
i != regularFooterSectionIndex &&
(sectionType == DocumentApp.ElementType.HEADER_SECTION ||
sectionType == DocumentApp.ElementType.FOOTER_SECTION));

func(section, i, isFirstPageSection);
}
}
当我尝试为 Google 幻灯片演示文稿创建与输入相同的内容时,我陷入了如何解析文档并提取所有文本位(以检查它们的链接)的步骤。看来我需要使用 getSlides() ,然后 getPageElements()并遍历这些,但我不清楚如何获得幻灯片上的实际文本。任何关于如何遍历幻灯片上的实际文本的提示(以及可能如何从该文本中提取链接 URL,如果有的话)将不胜感激。谢谢!

最佳答案

如果您只是不想从幻灯片中获取链接,请参阅下面的代码:
代码:

function getLinksFromSlides() {
var presentation = SlidesApp.getActivePresentation();
var slides = presentation.getSlides();
// traverse each slide
slides.forEach(function (slide) {
var shapes = slide.getShapes();
// traverse each shape
shapes.forEach(function (shape) {
// get its text content
var textRange = shape.getText();
var links = textRange.getLinks();
// print all links found
links.forEach(link => Logger.log(link.getTextStyle().getLink().getUrl()));
});
});
}
样本:
sample
输出:
output
笔记:
  • 这只会提取超链接。它不会提取任何非超链接的链接/url,如示例数据中所示。 (例如 https://www.facebook.com)
  • 如果您想要非超链接网址,那么您可能必须尝试正则表达式。
  • 关于javascript - 使用 Google Apps Script 从 Google Slides 演示文稿中提取所有 URL 链接,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68821925/

    26 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com