diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..744289d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Project exclude paths +/target/ \ No newline at end of file diff --git a/DomainSplit.iml b/DomainSplit.iml new file mode 100644 index 0000000..78b2cc5 --- /dev/null +++ b/DomainSplit.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4903572 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# 简述 +一个从 [BE](https://github.com/ffffffff0x/BerylEnigma) 中独立出来的轮子,可以快速分割每一级的URL连接。 + +# 使用方法 + +```bash +java -jar DomainSplit-1.0.jar -in [dir/text.txt] -out [dir/text.txt] +``` + +# 效果 + +![](./assets/img/test1.png) +![](./assets/img/test2.png) \ No newline at end of file diff --git a/assets/img/test1.png b/assets/img/test1.png new file mode 100644 index 0000000..d301b24 Binary files /dev/null and b/assets/img/test1.png differ diff --git a/assets/img/test2.png b/assets/img/test2.png new file mode 100644 index 0000000..838bd27 Binary files /dev/null and b/assets/img/test2.png differ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..3c189f8 --- /dev/null +++ b/pom.xml @@ -0,0 +1,48 @@ + + + 4.0.0 + + org.ffffffff0x + DomainSplit + 1.0 + + + 8 + 8 + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 1.8 + 1.8 + + + + org.apache.maven.plugins + maven-shade-plugin + 1.2.1 + + + package + + shade + + + + + ffffffff0x.domainSplit.Main.Main + + + + + + + + + \ No newline at end of file diff --git a/src/main/java/ffffffff0x/domainSplit/Main/CliController.java b/src/main/java/ffffffff0x/domainSplit/Main/CliController.java new file mode 100644 index 0000000..5cd90ea --- /dev/null +++ b/src/main/java/ffffffff0x/domainSplit/Main/CliController.java @@ -0,0 +1,79 @@ +package ffffffff0x.domainSplit.Main; + +import ffffffff0x.domainSplit.impl.DomainSplit; +import ffffffff0x.domainSplit.impl.FileUtils; + +import java.io.File; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +/** + * @author: RyuZUSUNC + * @create: 2021-06-03 11:26 + **/ + +public class CliController { + File inputfile; + File outputfile; + String allResult; + Map multipleResult = new HashMap<>(); + Map> originalResult; + + public void run(String in,String out){ + fileSplit(in,out); + paraPocessing(originalResult,false); + FileUtils.outPutFile(outputfile,allResult,"UTF-8"); + System.out.println("任务完成,输出目录为: " + outputfile.getAbsolutePath()); + } + + public void fileSplit(String in,String output){ + try{ + inputfile = new File(in); + outputfile = new File(output); + originalResult = DomainSplit.domainSplit(inputfile); + }catch (Exception e){ + System.out.println("输入有误,请检查文件路径"); + } + } + + public Boolean isReady(String in,String out){ + if (in.equals("-in") && out.equals("-out")){ + return true; + }else { + waring(); + return false; + } + } + + public void waring(){ + System.out.println("语法参考: java -jar DomainSplit.jar -in [dir/input.txt] -out [dir/output.txt]"); + } + + private void paraPocessing(Map> result, boolean multipleFile){ + int k = 0; + + if(multipleFile){ + for (int i = k; i < result.size()-1; i++) { + StringBuilder sb =new StringBuilder(); + for (String a:result.get(i)) { + sb.append(a).append("\n"); + } + sb.append("\n"); + if (i==-1){ + multipleResult.put("fileURLs.txt",sb.toString()); + }else { + multipleResult.put("level-" + i +".txt",sb.toString()); + } + } + }else { + StringBuilder sb =new StringBuilder(); + for (int i = k; i < result.size()-1; i++) { + for (String a:result.get(i)) { + sb.append(a).append("\n"); + } + } + allResult = sb.toString(); + } + } +} diff --git a/src/main/java/ffffffff0x/domainSplit/Main/Main.java b/src/main/java/ffffffff0x/domainSplit/Main/Main.java new file mode 100644 index 0000000..888e838 --- /dev/null +++ b/src/main/java/ffffffff0x/domainSplit/Main/Main.java @@ -0,0 +1,22 @@ +package ffffffff0x.domainSplit.Main; + +import sun.awt.windows.WPrinterJob; + +/** + * @author: RyuZUSUNC + * @create: 2021-06-03 10:34 + **/ + +public class Main { + public static void main(String[] args) { + CliController cliController = new CliController(); + try { + if (cliController.isReady(args[0],args[2])){ + cliController.run(args[1],args[3]); + } + }catch (Exception e){ + cliController.waring(); + } + + } +} diff --git a/src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java b/src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java new file mode 100644 index 0000000..f71071f --- /dev/null +++ b/src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java @@ -0,0 +1,119 @@ +package ffffffff0x.domainSplit.impl; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author: RyuZUSUNC + * @create: 2021-05-09 15:03 + **/ + +public class DomainSplit { + public static Map> domainSplit(Object object){ + ArrayList allURL = new ArrayList<>(); + HashSet notDirURL = new HashSet<>(); + + for (String domain:FileUtils.readLine(object)) { + if(regexStringNum(domain,"/")==domain.split("/").length){ + allURL.add(split(domain)); + }else{ + notDirURL.add(domain); + allURL.add(split(domain.substring(0,domain.lastIndexOf("/")))); + } + } + + Map> result = sortingDomain(allURL); + result.put(-1,notDirURL); + return result; + } + + /** + * 用于分割单个域名中的所有目录 + * @param domain + * @return + */ + private static String[] split(String domain){ + //协议头 + String protocol = ""; + //用来做返回值的字符串 + StringBuilder stringBuilder = new StringBuilder(); + + //判断目标是否含有协议头 + if(domain.contains("://")){ + protocol = domain.split("://")[0] + "://"; + domain = domain.split("://")[1]; + } + +// System.out.println(domain); +// System.out.println(RegexStringNum(domain,"/")); + + //用来缓存每次拼接的结果 + String zero = ""; + + //每次拼接下一级目录并保存至StringBuilder + for (String split:domain.split("/")) { + zero = zero + split + "/"; + stringBuilder.append(protocol).append(zero).append("\n"); + } + + //返回值判断URL末尾是目录还是文件 + return stringBuilder.toString().split("\n"); + } + + /** + * 用来判断URL中出现"/"的次数 + * @param targetStr + * @param patternStr + * @return + */ + private static int regexStringNum(String targetStr, String patternStr) { + // 定义一个样式模板,此中使用正则表达式,括号中是要抓的内容 + // 相当于埋好了陷阱匹配的地方就会掉下去 + Pattern pattern = Pattern.compile(patternStr); + // 定义一个matcher用来做匹配 + Matcher matcher = pattern.matcher(targetStr); + //找到的次数 + int count = 0; + // 如果找到了 + while (matcher.find()) { + count++; + } + return count; + } + + /** + * 用来对每一个URL分割的目标分类并去重 + * @param arrayList + * @return + */ + private static Map> sortingDomain(ArrayList arrayList){ + String[] temp; + //倒序排序,用来确定最大下标 + for (int i = 0; i < arrayList.size()-1; i++) { + for(int j=0;j arrayList.get(j).length){ + temp = arrayList.get(j); + arrayList.set(j,arrayList.get(j+1)); + arrayList.set(j+1,temp); + } + } + } + + Map> result = new HashMap<>(); + //按级别分类重组,使用Hashset去重 + for (int i = 0; i < arrayList.get(0).length; i++) { + HashSet hashSet = new HashSet(); + for (String[] list:arrayList) { + if(list.length > i){ + hashSet.add(list[i]); + } + } + result.put(i,hashSet); + } + return result; + } +} diff --git a/src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java b/src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java new file mode 100644 index 0000000..1ab10cd --- /dev/null +++ b/src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java @@ -0,0 +1,90 @@ +package ffffffff0x.domainSplit.impl; + +import java.awt.*; +import java.io.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Map; + +public class FileUtils { + /** + * 获取文件的byte数组格式 + * @param file + * @return + */ + public static byte[] getFilebyte(File file){ + FileInputStream fileInputStream; + byte[] result = null; + try { + fileInputStream = new FileInputStream(file); + result = new byte[fileInputStream.available()]; + fileInputStream.read(result); + } catch (IOException e) { + e.printStackTrace(); + } + return result; + } + + /** + * 按行获取文本 + * @param file + * @return + */ + public static ArrayList getFileLines(File file){ + ArrayList result = new ArrayList<>(); + try { + FileInputStream inputStream = new FileInputStream(file); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); + + String str; + while((str = bufferedReader.readLine()) != null) + { + result.add(str); + } + inputStream.close(); + bufferedReader.close(); + } catch (IOException e) { +// e.printStackTrace(); + } + return result; + } + + /** + * 保存文本格式文件至存储 + * @param out + * @param charset + */ + public static void outPutFile(File file,String out,String charset){ + if (!file.exists()) { + file.getParentFile().mkdirs();// 目录不存在的情况下,创建目录。 + } + if(file!=null) { + try { + OutputStreamWriter OSW = new OutputStreamWriter(new FileOutputStream(file), charset); + OSW.write(out); + OSW.flush(); + OSW.close(); +// Desktop.getDesktop().open(file); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + /** + * 返回载入文件/字符串的按行分割后的ArrayList + * @param object + * @return + */ + public static ArrayList readLine(Object object){ + if(object instanceof File){ + return FileUtils.getFileLines((File)object); + }else { + String text = (String)object; + ArrayList list = new ArrayList<>(); + //把数组转成集合,也就是把数组里面的数据存进集合; + Collections.addAll(list, text.split("\n")); + return list; + } + } +}