Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuZUSUNC committed Jun 3, 2021
0 parents commit d935d41
Show file tree
Hide file tree
Showing 10 changed files with 375 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Project exclude paths
/target/
2 changes: 2 additions & 0 deletions DomainSplit.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4" />
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# 简述
一个从 [BE](https://github.com/ffffffff0x/BerylEnigma) 中独立出来的轮子,可以快速分割每一级的URL连接。

# 使用方法

```bash
java -jar DomainSplit-1.0.jar -in [dir/text.txt] -out [dir/text.txt]
```

# 效果

![](./assets/img/test1.png)
![](./assets/img/test2.png)
Binary file added assets/img/test1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/img/test2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
48 changes: 48 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.ffffffff0x</groupId>
<artifactId>DomainSplit</artifactId>
<version>1.0</version>

<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>ffffffff0x.domainSplit.Main.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
79 changes: 79 additions & 0 deletions src/main/java/ffffffff0x/domainSplit/Main/CliController.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package ffffffff0x.domainSplit.Main;

import ffffffff0x.domainSplit.impl.DomainSplit;
import ffffffff0x.domainSplit.impl.FileUtils;

import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

/**
* @author: RyuZUSUNC
* @create: 2021-06-03 11:26
**/

public class CliController {
File inputfile;
File outputfile;
String allResult;
Map<String,String> multipleResult = new HashMap<>();
Map<Integer, HashSet<String>> originalResult;

public void run(String in,String out){
fileSplit(in,out);
paraPocessing(originalResult,false);
FileUtils.outPutFile(outputfile,allResult,"UTF-8");
System.out.println("任务完成,输出目录为: " + outputfile.getAbsolutePath());
}

public void fileSplit(String in,String output){
try{
inputfile = new File(in);
outputfile = new File(output);
originalResult = DomainSplit.domainSplit(inputfile);
}catch (Exception e){
System.out.println("输入有误,请检查文件路径");
}
}

public Boolean isReady(String in,String out){
if (in.equals("-in") && out.equals("-out")){
return true;
}else {
waring();
return false;
}
}

public void waring(){
System.out.println("语法参考: java -jar DomainSplit.jar -in [dir/input.txt] -out [dir/output.txt]");
}

private void paraPocessing(Map<Integer, HashSet<String>> result, boolean multipleFile){
int k = 0;

if(multipleFile){
for (int i = k; i < result.size()-1; i++) {
StringBuilder sb =new StringBuilder();
for (String a:result.get(i)) {
sb.append(a).append("\n");
}
sb.append("\n");
if (i==-1){
multipleResult.put("fileURLs.txt",sb.toString());
}else {
multipleResult.put("level-" + i +".txt",sb.toString());
}
}
}else {
StringBuilder sb =new StringBuilder();
for (int i = k; i < result.size()-1; i++) {
for (String a:result.get(i)) {
sb.append(a).append("\n");
}
}
allResult = sb.toString();
}
}
}
22 changes: 22 additions & 0 deletions src/main/java/ffffffff0x/domainSplit/Main/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package ffffffff0x.domainSplit.Main;

import sun.awt.windows.WPrinterJob;

/**
* @author: RyuZUSUNC
* @create: 2021-06-03 10:34
**/

public class Main {
public static void main(String[] args) {
CliController cliController = new CliController();
try {
if (cliController.isReady(args[0],args[2])){
cliController.run(args[1],args[3]);
}
}catch (Exception e){
cliController.waring();
}

}
}
119 changes: 119 additions & 0 deletions src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package ffffffff0x.domainSplit.impl;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @author: RyuZUSUNC
* @create: 2021-05-09 15:03
**/

public class DomainSplit {
public static Map<Integer, HashSet<String>> domainSplit(Object object){
ArrayList<String[]> allURL = new ArrayList<>();
HashSet<String> notDirURL = new HashSet<>();

for (String domain:FileUtils.readLine(object)) {
if(regexStringNum(domain,"/")==domain.split("/").length){
allURL.add(split(domain));
}else{
notDirURL.add(domain);
allURL.add(split(domain.substring(0,domain.lastIndexOf("/"))));
}
}

Map<Integer, HashSet<String>> result = sortingDomain(allURL);
result.put(-1,notDirURL);
return result;
}

/**
* 用于分割单个域名中的所有目录
* @param domain
* @return
*/
private static String[] split(String domain){
//协议头
String protocol = "";
//用来做返回值的字符串
StringBuilder stringBuilder = new StringBuilder();

//判断目标是否含有协议头
if(domain.contains("://")){
protocol = domain.split("://")[0] + "://";
domain = domain.split("://")[1];
}

// System.out.println(domain);
// System.out.println(RegexStringNum(domain,"/"));

//用来缓存每次拼接的结果
String zero = "";

//每次拼接下一级目录并保存至StringBuilder
for (String split:domain.split("/")) {
zero = zero + split + "/";
stringBuilder.append(protocol).append(zero).append("\n");
}

//返回值判断URL末尾是目录还是文件
return stringBuilder.toString().split("\n");
}

/**
* 用来判断URL中出现"/"的次数
* @param targetStr
* @param patternStr
* @return
*/
private static int regexStringNum(String targetStr, String patternStr) {
// 定义一个样式模板,此中使用正则表达式,括号中是要抓的内容
// 相当于埋好了陷阱匹配的地方就会掉下去
Pattern pattern = Pattern.compile(patternStr);
// 定义一个matcher用来做匹配
Matcher matcher = pattern.matcher(targetStr);
//找到的次数
int count = 0;
// 如果找到了
while (matcher.find()) {
count++;
}
return count;
}

/**
* 用来对每一个URL分割的目标分类并去重
* @param arrayList
* @return
*/
private static Map<Integer, HashSet<String>> sortingDomain(ArrayList<String[]> arrayList){
String[] temp;
//倒序排序,用来确定最大下标
for (int i = 0; i < arrayList.size()-1; i++) {
for(int j=0;j<arrayList.size()-i-1;j++){
if(arrayList.get(j+1).length > arrayList.get(j).length){
temp = arrayList.get(j);
arrayList.set(j,arrayList.get(j+1));
arrayList.set(j+1,temp);
}
}
}

Map<Integer,HashSet<String>> result = new HashMap<>();
//按级别分类重组,使用Hashset去重
for (int i = 0; i < arrayList.get(0).length; i++) {
HashSet<String> hashSet = new HashSet();
for (String[] list:arrayList) {
if(list.length > i){
hashSet.add(list[i]);
}
}
result.put(i,hashSet);
}
return result;
}
}
90 changes: 90 additions & 0 deletions src/main/java/ffffffff0x/domainSplit/impl/FileUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package ffffffff0x.domainSplit.impl;

import java.awt.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;

public class FileUtils {
/**
* 获取文件的byte数组格式
* @param file
* @return
*/
public static byte[] getFilebyte(File file){
FileInputStream fileInputStream;
byte[] result = null;
try {
fileInputStream = new FileInputStream(file);
result = new byte[fileInputStream.available()];
fileInputStream.read(result);
} catch (IOException e) {
e.printStackTrace();
}
return result;
}

/**
* 按行获取文本
* @param file
* @return
*/
public static ArrayList<String> getFileLines(File file){
ArrayList<String> result = new ArrayList<>();
try {
FileInputStream inputStream = new FileInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));

String str;
while((str = bufferedReader.readLine()) != null)
{
result.add(str);
}
inputStream.close();
bufferedReader.close();
} catch (IOException e) {
// e.printStackTrace();
}
return result;
}

/**
* 保存文本格式文件至存储
* @param out
* @param charset
*/
public static void outPutFile(File file,String out,String charset){
if (!file.exists()) {
file.getParentFile().mkdirs();// 目录不存在的情况下,创建目录。
}
if(file!=null) {
try {
OutputStreamWriter OSW = new OutputStreamWriter(new FileOutputStream(file), charset);
OSW.write(out);
OSW.flush();
OSW.close();
// Desktop.getDesktop().open(file);
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* 返回载入文件/字符串的按行分割后的ArrayList<String>
* @param object
* @return
*/
public static ArrayList<String> readLine(Object object){
if(object instanceof File){
return FileUtils.getFileLines((File)object);
}else {
String text = (String)object;
ArrayList<String> list = new ArrayList<>();
//把数组转成集合,也就是把数组里面的数据存进集合;
Collections.addAll(list, text.split("\n"));
return list;
}
}
}

0 comments on commit d935d41

Please sign in to comment.