Initial commit

ffffffff0x · Jun 3, 2021 · d935d41 · d935d41
commit d935d41
Show file tree

Hide file tree

Showing 10 changed files with 375 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Project exclude paths
+/target/
diff --git a/DomainSplit.iml b/DomainSplit.iml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4" />
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+# 简述
+一个从 [BE](https://github.com/ffffffff0x/BerylEnigma) 中独立出来的轮子,可以快速分割每一级的URL连接。
+
+# 使用方法
+
+```bash
+java -jar DomainSplit-1.0.jar -in [dir/text.txt] -out [dir/text.txt]
+```
+
+# 效果
+
+![](./assets/img/test1.png)
+![](./assets/img/test2.png)
diff --git a/assets/img/test1.png b/assets/img/test1.png
diff --git a/assets/img/test2.png b/assets/img/test2.png
diff --git a/pom.xml b/pom.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.ffffffff0x</groupId>
+    <artifactId>DomainSplit</artifactId>
+    <version>1.0</version>
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+    </properties>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>1.2.1</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>ffffffff0x.domainSplit.Main.Main</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/src/main/java/ffffffff0x/domainSplit/Main/CliController.java b/src/main/java/ffffffff0x/domainSplit/Main/CliController.java
@@ -0,0 +1,79 @@
+package ffffffff0x.domainSplit.Main;
+
+import ffffffff0x.domainSplit.impl.DomainSplit;
+import ffffffff0x.domainSplit.impl.FileUtils;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+/**
+ * @author: RyuZUSUNC
+ * @create: 2021-06-03 11:26
+ **/
+
+public class CliController {
+    File inputfile;
+    File outputfile;
+    String allResult;
+    Map<String,String> multipleResult = new HashMap<>();
+    Map<Integer, HashSet<String>> originalResult;
+
+    public void run(String in,String out){
+        fileSplit(in,out);
+        paraPocessing(originalResult,false);
+        FileUtils.outPutFile(outputfile,allResult,"UTF-8");
+        System.out.println("任务完成，输出目录为: " + outputfile.getAbsolutePath());
+    }
+
+    public void fileSplit(String in,String output){
+        try{
+            inputfile = new File(in);
+            outputfile = new File(output);
+            originalResult = DomainSplit.domainSplit(inputfile);
+        }catch (Exception e){
+            System.out.println("输入有误，请检查文件路径");
+        }
+    }
+
+    public Boolean isReady(String in,String out){
+        if (in.equals("-in") && out.equals("-out")){
+            return true;
+        }else {
+            waring();
+            return false;
+        }
+    }
+
+    public void waring(){
+        System.out.println("语法参考: java -jar DomainSplit.jar -in [dir/input.txt] -out [dir/output.txt]");
+    }
+
+    private void paraPocessing(Map<Integer, HashSet<String>> result, boolean multipleFile){
+        int k = 0;
+
+        if(multipleFile){
+            for (int i = k; i < result.size()-1; i++) {
+                StringBuilder sb =new StringBuilder();
+                for (String a:result.get(i)) {
+                    sb.append(a).append("\n");
+                }
+                sb.append("\n");
+                if (i==-1){
+                    multipleResult.put("fileURLs.txt",sb.toString());
+                }else {
+                    multipleResult.put("level-" + i +".txt",sb.toString());
+                }
+            }
+        }else {
+            StringBuilder sb =new StringBuilder();
+            for (int i = k; i < result.size()-1; i++) {
+                for (String a:result.get(i)) {
+                    sb.append(a).append("\n");
+                }
+            }
+            allResult = sb.toString();
+        }
+    }
+}
diff --git a/src/main/java/ffffffff0x/domainSplit/Main/Main.java b/src/main/java/ffffffff0x/domainSplit/Main/Main.java
@@ -0,0 +1,22 @@
+package ffffffff0x.domainSplit.Main;
+
+import sun.awt.windows.WPrinterJob;
+
+/**
+ * @author: RyuZUSUNC
+ * @create: 2021-06-03 10:34
+ **/
+
+public class Main {
+    public static void main(String[] args) {
+        CliController cliController = new CliController();
+        try {
+            if (cliController.isReady(args[0],args[2])){
+                cliController.run(args[1],args[3]);
+            }
+        }catch (Exception e){
+            cliController.waring();
+        }
+
+    }
+}
diff --git a/src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java b/src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java
@@ -0,0 +1,119 @@
+package ffffffff0x.domainSplit.impl;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author: RyuZUSUNC
+ * @create: 2021-05-09 15:03
+ **/
+
+public class DomainSplit {
+    public static Map<Integer, HashSet<String>> domainSplit(Object object){
+        ArrayList<String[]> allURL = new ArrayList<>();
+        HashSet<String> notDirURL = new HashSet<>();
+
+        for (String domain:FileUtils.readLine(object)) {
+            if(regexStringNum(domain,"/")==domain.split("/").length){
+                allURL.add(split(domain));
+            }else{
+                notDirURL.add(domain);
+                allURL.add(split(domain.substring(0,domain.lastIndexOf("/"))));
+            }
+        }
+
+        Map<Integer, HashSet<String>> result = sortingDomain(allURL);
+        result.put(-1,notDirURL);
+        return result;
+    }
+
+    /**
+     * 用于分割单个域名中的所有目录
+     * @param domain
+     * @return
+     */
+    private static String[] split(String domain){
+        //协议头
+        String protocol = "";
+        //用来做返回值的字符串
+        StringBuilder stringBuilder = new StringBuilder();
+
+        //判断目标是否含有协议头
+        if(domain.contains("://")){
+            protocol = domain.split("://")[0] + "://";
+            domain = domain.split("://")[1];
+        }
+
+//        System.out.println(domain);
+//        System.out.println(RegexStringNum(domain,"/"));
+
+        //用来缓存每次拼接的结果
+        String zero = "";
+
+        //每次拼接下一级目录并保存至StringBuilder
+        for (String split:domain.split("/")) {
+            zero = zero + split + "/";
+            stringBuilder.append(protocol).append(zero).append("\n");
+        }
+
+        //返回值判断URL末尾是目录还是文件
+        return stringBuilder.toString().split("\n");
+    }
+
+    /**
+     * 用来判断URL中出现"/"的次数
+     * @param targetStr
+     * @param patternStr
+     * @return
+     */
+    private static int regexStringNum(String targetStr, String patternStr) {
+        // 定义一个样式模板，此中使用正则表达式，括号中是要抓的内容
+        // 相当于埋好了陷阱匹配的地方就会掉下去
+        Pattern pattern = Pattern.compile(patternStr);
+        // 定义一个matcher用来做匹配
+        Matcher matcher = pattern.matcher(targetStr);
+        //找到的次数
+        int count = 0;
+        // 如果找到了
+        while (matcher.find()) {
+            count++;
+        }
+        return count;
+    }
+
+    /**
+     * 用来对每一个URL分割的目标分类并去重
+     * @param arrayList
+     * @return
+     */
+    private static Map<Integer, HashSet<String>> sortingDomain(ArrayList<String[]> arrayList){
+        String[] temp;
+        //倒序排序,用来确定最大下标
+        for (int i = 0; i < arrayList.size()-1; i++) {
+            for(int j=0;j<arrayList.size()-i-1;j++){
+                if(arrayList.get(j+1).length > arrayList.get(j).length){
+                    temp = arrayList.get(j);
+                    arrayList.set(j,arrayList.get(j+1));
+                    arrayList.set(j+1,temp);
+                }
+            }
+        }
+
+        Map<Integer,HashSet<String>> result = new HashMap<>();
+        //按级别分类重组,使用Hashset去重
+        for (int i = 0; i < arrayList.get(0).length; i++) {
+            HashSet<String> hashSet = new HashSet();
+            for (String[] list:arrayList) {
+                if(list.length > i){
+                    hashSet.add(list[i]);
+                }
+            }
+            result.put(i,hashSet);
+        }
+        return result;
+    }
+}
diff --git a/src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java b/src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java
@@ -0,0 +1,90 @@
+package ffffffff0x.domainSplit.impl;
+
+import java.awt.*;
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Map;
+
+public class FileUtils {
+    /**
+     * 获取文件的byte数组格式
+     * @param file
+     * @return
+     */
+    public static byte[] getFilebyte(File file){
+        FileInputStream fileInputStream;
+        byte[] result = null;
+        try {
+            fileInputStream = new FileInputStream(file);
+            result = new byte[fileInputStream.available()];
+            fileInputStream.read(result);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        return result;
+    }
+
+    /**
+     * 按行获取文本
+     * @param file
+     * @return
+     */
+    public static ArrayList<String> getFileLines(File file){
+        ArrayList<String> result = new ArrayList<>();
+        try {
+            FileInputStream inputStream = new FileInputStream(file);
+            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
+
+            String str;
+            while((str = bufferedReader.readLine()) != null)
+            {
+               result.add(str);
+            }
+            inputStream.close();
+            bufferedReader.close();
+        } catch (IOException e) {
+//            e.printStackTrace();
+        }
+        return result;
+    }
+
+    /**
+     * 保存文本格式文件至存储
+     * @param out
+     * @param charset
+     */
+    public static void outPutFile(File file,String out,String charset){
+        if (!file.exists()) {
+            file.getParentFile().mkdirs();// 目录不存在的情况下，创建目录。
+        }
+        if(file!=null) {
+            try {
+                OutputStreamWriter OSW = new OutputStreamWriter(new FileOutputStream(file), charset);
+                OSW.write(out);
+                OSW.flush();
+                OSW.close();
+//                Desktop.getDesktop().open(file);
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    /**
+     * 返回载入文件/字符串的按行分割后的ArrayList<String>
+     * @param object
+     * @return
+     */
+    public static ArrayList<String> readLine(Object object){
+        if(object instanceof File){
+            return FileUtils.getFileLines((File)object);
+        }else {
+            String text = (String)object;
+            ArrayList<String> list = new ArrayList<>();
+            //把数组转成集合，也就是把数组里面的数据存进集合；
+            Collections.addAll(list, text.split("\n"));
+            return list;
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		<?xml version="1.0" encoding="UTF-8"?>
		<module type="JAVA_MODULE" version="4" />