LinkCheckService.java
package sk.iway.iwcm.editor.service;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
import cvu.html.HTMLTokenizer;
import cvu.html.TagToken;
import sk.iway.iwcm.Constants;
import sk.iway.iwcm.FileTools;
import sk.iway.iwcm.Tools;
import sk.iway.iwcm.doc.DocDB;
import sk.iway.iwcm.doc.DocDetails;
import sk.iway.iwcm.doc.DocDetailsRepository;
import sk.iway.iwcm.doc.GroupDetails;
import sk.iway.iwcm.doc.GroupsDB;
import sk.iway.iwcm.editor.rest.LinkCheckDto;
import sk.iway.iwcm.editor.rest.LinkCheckErrorDto;
import sk.iway.iwcm.i18n.Prop;
/**
* Functions and logic to prepare linkCheck arrays
*/
@Service
public class LinkCheckService {
public LinkCheckService() {
//
}
//Enum of posible error types
public static enum ErrorTypes {
PAGE_NOT_EXIST,
DOC_NOT_AVAILABLE,
FILE_NOT_EXIST,
IMAGE_NOT_EXIST,
LINK_NOT_EXIST
}
private String stripParameters(String url) {
//Remove /thumb/
if (url.indexOf("/thumb/") != -1)
url = Tools.replace(url, "/thumb", "");
int i = url.indexOf('?');
int j = url.indexOf('#');
if (i==-1 && j==-1) return url;
try {
if (i!=-1) url = url.substring(0, i);
if (j!=-1) url = url.substring(0, j);
} catch (Exception e) {
sk.iway.iwcm.Logger.error(e);
}
return url;
}
/**
* Check doc and link to check if any error occurs. Return list of found errors.
* @param doc doc to check
* @param docDB DocDB instance
* @param groupsDB GroupsDB instance
* @param prop Prop instance
* @return List of LinkCheckErrorDto objects, evry object represend one found specific error (if no error was found, return instance of empty List)
*/
private List<LinkCheckErrorDto> checkLinks(DocDetails doc, DocDB docDB, GroupsDB groupsDB, Prop prop) {
if(doc.getData() == null) return null;
List<LinkCheckErrorDto> errors = new ArrayList<>();
HTMLTokenizer htmlTokenizer = new HTMLTokenizer(Tools.replace(doc.getData(), "/>", ">").toCharArray());
//HTMLTree htmlTree = new HTMLTree(htmlTokenizer);
@SuppressWarnings("unchecked")
Enumeration<Object> e = htmlTokenizer.getTokens();
TagToken tagToken;
Object o;
String tagName;
String href;
String src;
int docId;
while (e.hasMoreElements()) {
o = e.nextElement();
if (o instanceof TagToken) {
tagToken = (TagToken) o;
tagName = tagToken.getName();
if (tagName == null || tagToken.isEndTag()) continue;
href = tagToken.getAttribute("href");
if (href != null && href.startsWith("#")==false) {
if (href.equalsIgnoreCase("javascript:")) {
//
} else if (href.toLowerCase().startsWith("http://") || href.toLowerCase().startsWith("https://")) {
//External link
//Its OK, we dont know check that
} else if (href.toLowerCase().startsWith("javascript:")) {
//Its OK, we dont know check that
} else if (href.toLowerCase().startsWith("mailto:")) {
//Its OK, we dont know check that
} else if (href.toLowerCase().startsWith("#") && href.length() > 1) {
//Its OK, we dont know check that
} else if (href.toLowerCase().equals("/")) {
//Its OK, we dont know check that
} else if (href.startsWith("/showdoc.do")) {
//Get docId param
docId = Tools.getIntValue(Tools.getParameterFromUrl(href, "docid"), -1);
DocDetails checkDoc = docDB.getBasicDocDetails(docId, false);
if (checkDoc == null)
errors.add(new LinkCheckErrorDto(ErrorTypes.PAGE_NOT_EXIST, href));
else if (checkDoc.isAvailable() == false)
errors.add(new LinkCheckErrorDto(ErrorTypes.DOC_NOT_AVAILABLE, href));
} else {
href = stripParameters(href);
//Check in DocDB
String domain = null;
GroupDetails group = groupsDB.getGroup(doc.getGroupId());
if (group != null) domain = group.getDomainName();
docId = docDB.getVirtualPathDocId(href, domain);
if (docId < 0) {
if (FileTools.isFile(href)==false)
{
errors.add(new LinkCheckErrorDto(isFileOrLink(href), href));
}
} else {
DocDetails checkDoc = docDB.getBasicDocDetails(docId, false);
if (checkDoc == null) errors.add(new LinkCheckErrorDto(ErrorTypes.PAGE_NOT_EXIST, href));
else if (checkDoc.isAvailable() == false) errors.add(new LinkCheckErrorDto(ErrorTypes.DOC_NOT_AVAILABLE, href));
}
}
}
src = tagToken.getAttribute("src");
if (Tools.isNotEmpty(src)) {
if (src.toLowerCase().startsWith("http://") || src.toLowerCase().startsWith("https://")) {
//We dont know
} else {
src = stripParameters(src);
//Check if exist on disc
if (FileTools.isFile(src) == false)
errors.add(new LinkCheckErrorDto(isFileOrLink(src), src));
}
}
}
}
return(errors);
}
/**
* Transform input DocDetails object to LinkCheckDto object.
* @param doc DocDetails object to transform
* @param error string of error to by add into LinkCheckDto class
* @param link string of url of page to by add into LinkCheckDto class
* @return new LinkCheckDto object created from entered params (if entered doc is null, null will be returned)
*/
private LinkCheckDto docDetailsToLinkCheck(DocDetails doc, String error, String link) {
if(doc == null) return null;
LinkCheckDto linkCheck = new LinkCheckDto();
linkCheck.setId(doc.getId());
linkCheck.setPage(doc.getFullPath());
linkCheck.setError(error);
linkCheck.setLink(link);
return linkCheck;
}
/**
* Get error specification text from translate key, based on input ErrorType
* @param errorType represents which error specification text we want
* @param prop Prop instance
* @return string error specification text from translate key (if entered ErrorType is not mentioned in function, return empty string "")
*/
private String getErrorTextFromEnum(ErrorTypes errorType, Prop prop) {
if(errorType == ErrorTypes.DOC_NOT_AVAILABLE) return prop.getText("linkcheck.doc_not_available");
else if(errorType == ErrorTypes.FILE_NOT_EXIST) return prop.getText("linkcheck.file_not_exist");
else if(errorType == ErrorTypes.IMAGE_NOT_EXIST) return prop.getText("linkcheck.image_not_exist");
else if(errorType == ErrorTypes.LINK_NOT_EXIST) return prop.getText("linkcheck.link_not_exist");
else if(errorType == ErrorTypes.PAGE_NOT_EXIST) return prop.getText("linkcheck.page_not_exist");
else return "";
}
/**
* Get DocDetails under entered group and DocDetails of child groups. For every DocDetails run error test (link check). If error was found,
* create LinkCheckDto object that represent specific error of specifi DocDetails and divide them into one of List of errors. Return
* specific list based on entered tableType param.
* @param groupId id of group, whose DocDetails will be checked (even every child group)
* @param tableType which one of assembled Lists of errors will be returned (if entered tableType is not mentioned in function, return instance of empty List)
* @param docDetailsRepository DocDetailsRepository instance
* @return List of LinkCheckDto objects, based on entered groupId and tableType choise
*/
public List<LinkCheckDto> linkCheckList(int groupId, String tableType, DocDetailsRepository docDetailsRepository) {
GroupsDB groupsDB = GroupsDB.getInstance();
DocDB docDB = DocDB.getInstance();
Prop prop = Prop.getInstance();
List<LinkCheckDto> emptyPageList = new ArrayList<>();
List<LinkCheckDto> brokenLinksList = new ArrayList<>();
List<LinkCheckDto> hiddenPageList = new ArrayList<>();
List<LinkCheckErrorDto> errors;
int linkCheckEmptyPageSize = Constants.getInt("linkCheckEmptyPageSize");
String searchGroups = null;
//najdi child grupy
for (GroupDetails group : groupsDB.getGroupsTree(groupId, true, true)) {
if (group != null) {
if (searchGroups == null)
searchGroups = Integer.toString(group.getGroupId());
else
searchGroups += "," + group.getGroupId();
}
}
List<DocDetails> posibleValues = docDetailsRepository.findAllByGroupIdIn(Tools.getTokensInt(searchGroups, ","), Pageable.ofSize(500)).getContent();
for(DocDetails doc : posibleValues) {
errors = checkLinks(doc, docDB, groupsDB, prop);
for(LinkCheckErrorDto error : errors) {
ErrorTypes errorType = error.getErrorType();
if(errorType == ErrorTypes.DOC_NOT_AVAILABLE)
hiddenPageList.add(docDetailsToLinkCheck(doc, getErrorTextFromEnum(errorType, prop), error.getLink())); //ak ma stranka zakazane zobrazovanie -> iba pre odkaz - ak sa zo stranky odkazuje na stranku so zakazanym zobrazovanim
else
brokenLinksList.add(docDetailsToLinkCheck(doc, getErrorTextFromEnum(errorType, prop), error.getLink()));
}
//vynecham stranky s nastavenym externym odkazom
if(Tools.isEmpty(doc.getExternalLink()) && (doc.getData() == null || doc.getData().length() < linkCheckEmptyPageSize)) {
String errorText = "";
if(doc.getData() == null) errorText = prop.getText("linkcheck.page_probably_empty");
else errorText = prop.getText("linkcheck.page_probably_empty") + ": " + doc.getData().length() + " " + prop.getText("linkcheck.chars");
emptyPageList.add(docDetailsToLinkCheck(doc, errorText, doc.getVirtualPath()));
}
}
if(tableType.equals("brokenLinks")) return brokenLinksList;
else if(tableType.equals("hiddenPages")) return hiddenPageList;
else if(tableType.equals("emptyPages")) return emptyPageList;
return new ArrayList<>();
}
/**
* By href link determines if it's probably File or general link
* @param href - URL path
* @return
*/
private ErrorTypes isFileOrLink(String href) {
if (href == null) return ErrorTypes.LINK_NOT_EXIST;
if (href.startsWith("/images") || href.startsWith("/shared/images")) return ErrorTypes.IMAGE_NOT_EXIST;
if (href.startsWith("/files") || href.startsWith("/shared")) return ErrorTypes.FILE_NOT_EXIST;
return ErrorTypes.LINK_NOT_EXIST;
}
}