-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d935d41
Showing
10 changed files
with
375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Project exclude paths | ||
/target/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<module type="JAVA_MODULE" version="4" /> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# 简述 | ||
一个从 [BE](https://github.com/ffffffff0x/BerylEnigma) 中独立出来的轮子,可以快速分割每一级的URL连接。 | ||
|
||
# 使用方法 | ||
|
||
```bash | ||
java -jar DomainSplit-1.0.jar -in [dir/text.txt] -out [dir/text.txt] | ||
``` | ||
|
||
# 效果 | ||
|
||
![](./assets/img/test1.png) | ||
![](./assets/img/test2.png) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>org.ffffffff0x</groupId> | ||
<artifactId>DomainSplit</artifactId> | ||
<version>1.0</version> | ||
|
||
<properties> | ||
<maven.compiler.source>8</maven.compiler.source> | ||
<maven.compiler.target>8</maven.compiler.target> | ||
</properties> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.8.1</version> | ||
<configuration> | ||
<source>1.8</source> | ||
<target>1.8</target> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>1.2.1</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<transformers> | ||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||
<mainClass>ffffffff0x.domainSplit.Main.Main</mainClass> | ||
</transformer> | ||
</transformers> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
79 changes: 79 additions & 0 deletions
79
src/main/java/ffffffff0x/domainSplit/Main/CliController.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package ffffffff0x.domainSplit.Main; | ||
|
||
import ffffffff0x.domainSplit.impl.DomainSplit; | ||
import ffffffff0x.domainSplit.impl.FileUtils; | ||
|
||
import java.io.File; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author: RyuZUSUNC | ||
* @create: 2021-06-03 11:26 | ||
**/ | ||
|
||
public class CliController { | ||
File inputfile; | ||
File outputfile; | ||
String allResult; | ||
Map<String,String> multipleResult = new HashMap<>(); | ||
Map<Integer, HashSet<String>> originalResult; | ||
|
||
public void run(String in,String out){ | ||
fileSplit(in,out); | ||
paraPocessing(originalResult,false); | ||
FileUtils.outPutFile(outputfile,allResult,"UTF-8"); | ||
System.out.println("任务完成,输出目录为: " + outputfile.getAbsolutePath()); | ||
} | ||
|
||
public void fileSplit(String in,String output){ | ||
try{ | ||
inputfile = new File(in); | ||
outputfile = new File(output); | ||
originalResult = DomainSplit.domainSplit(inputfile); | ||
}catch (Exception e){ | ||
System.out.println("输入有误,请检查文件路径"); | ||
} | ||
} | ||
|
||
public Boolean isReady(String in,String out){ | ||
if (in.equals("-in") && out.equals("-out")){ | ||
return true; | ||
}else { | ||
waring(); | ||
return false; | ||
} | ||
} | ||
|
||
public void waring(){ | ||
System.out.println("语法参考: java -jar DomainSplit.jar -in [dir/input.txt] -out [dir/output.txt]"); | ||
} | ||
|
||
private void paraPocessing(Map<Integer, HashSet<String>> result, boolean multipleFile){ | ||
int k = 0; | ||
|
||
if(multipleFile){ | ||
for (int i = k; i < result.size()-1; i++) { | ||
StringBuilder sb =new StringBuilder(); | ||
for (String a:result.get(i)) { | ||
sb.append(a).append("\n"); | ||
} | ||
sb.append("\n"); | ||
if (i==-1){ | ||
multipleResult.put("fileURLs.txt",sb.toString()); | ||
}else { | ||
multipleResult.put("level-" + i +".txt",sb.toString()); | ||
} | ||
} | ||
}else { | ||
StringBuilder sb =new StringBuilder(); | ||
for (int i = k; i < result.size()-1; i++) { | ||
for (String a:result.get(i)) { | ||
sb.append(a).append("\n"); | ||
} | ||
} | ||
allResult = sb.toString(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package ffffffff0x.domainSplit.Main; | ||
|
||
import sun.awt.windows.WPrinterJob; | ||
|
||
/** | ||
* @author: RyuZUSUNC | ||
* @create: 2021-06-03 10:34 | ||
**/ | ||
|
||
public class Main { | ||
public static void main(String[] args) { | ||
CliController cliController = new CliController(); | ||
try { | ||
if (cliController.isReady(args[0],args[2])){ | ||
cliController.run(args[1],args[3]); | ||
} | ||
}catch (Exception e){ | ||
cliController.waring(); | ||
} | ||
|
||
} | ||
} |
119 changes: 119 additions & 0 deletions
119
src/main/java/ffffffff0x/domainSplit/impl/DomainSplit.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package ffffffff0x.domainSplit.impl; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* @author: RyuZUSUNC | ||
* @create: 2021-05-09 15:03 | ||
**/ | ||
|
||
public class DomainSplit { | ||
public static Map<Integer, HashSet<String>> domainSplit(Object object){ | ||
ArrayList<String[]> allURL = new ArrayList<>(); | ||
HashSet<String> notDirURL = new HashSet<>(); | ||
|
||
for (String domain:FileUtils.readLine(object)) { | ||
if(regexStringNum(domain,"/")==domain.split("/").length){ | ||
allURL.add(split(domain)); | ||
}else{ | ||
notDirURL.add(domain); | ||
allURL.add(split(domain.substring(0,domain.lastIndexOf("/")))); | ||
} | ||
} | ||
|
||
Map<Integer, HashSet<String>> result = sortingDomain(allURL); | ||
result.put(-1,notDirURL); | ||
return result; | ||
} | ||
|
||
/** | ||
* 用于分割单个域名中的所有目录 | ||
* @param domain | ||
* @return | ||
*/ | ||
private static String[] split(String domain){ | ||
//协议头 | ||
String protocol = ""; | ||
//用来做返回值的字符串 | ||
StringBuilder stringBuilder = new StringBuilder(); | ||
|
||
//判断目标是否含有协议头 | ||
if(domain.contains("://")){ | ||
protocol = domain.split("://")[0] + "://"; | ||
domain = domain.split("://")[1]; | ||
} | ||
|
||
// System.out.println(domain); | ||
// System.out.println(RegexStringNum(domain,"/")); | ||
|
||
//用来缓存每次拼接的结果 | ||
String zero = ""; | ||
|
||
//每次拼接下一级目录并保存至StringBuilder | ||
for (String split:domain.split("/")) { | ||
zero = zero + split + "/"; | ||
stringBuilder.append(protocol).append(zero).append("\n"); | ||
} | ||
|
||
//返回值判断URL末尾是目录还是文件 | ||
return stringBuilder.toString().split("\n"); | ||
} | ||
|
||
/** | ||
* 用来判断URL中出现"/"的次数 | ||
* @param targetStr | ||
* @param patternStr | ||
* @return | ||
*/ | ||
private static int regexStringNum(String targetStr, String patternStr) { | ||
// 定义一个样式模板,此中使用正则表达式,括号中是要抓的内容 | ||
// 相当于埋好了陷阱匹配的地方就会掉下去 | ||
Pattern pattern = Pattern.compile(patternStr); | ||
// 定义一个matcher用来做匹配 | ||
Matcher matcher = pattern.matcher(targetStr); | ||
//找到的次数 | ||
int count = 0; | ||
// 如果找到了 | ||
while (matcher.find()) { | ||
count++; | ||
} | ||
return count; | ||
} | ||
|
||
/** | ||
* 用来对每一个URL分割的目标分类并去重 | ||
* @param arrayList | ||
* @return | ||
*/ | ||
private static Map<Integer, HashSet<String>> sortingDomain(ArrayList<String[]> arrayList){ | ||
String[] temp; | ||
//倒序排序,用来确定最大下标 | ||
for (int i = 0; i < arrayList.size()-1; i++) { | ||
for(int j=0;j<arrayList.size()-i-1;j++){ | ||
if(arrayList.get(j+1).length > arrayList.get(j).length){ | ||
temp = arrayList.get(j); | ||
arrayList.set(j,arrayList.get(j+1)); | ||
arrayList.set(j+1,temp); | ||
} | ||
} | ||
} | ||
|
||
Map<Integer,HashSet<String>> result = new HashMap<>(); | ||
//按级别分类重组,使用Hashset去重 | ||
for (int i = 0; i < arrayList.get(0).length; i++) { | ||
HashSet<String> hashSet = new HashSet(); | ||
for (String[] list:arrayList) { | ||
if(list.length > i){ | ||
hashSet.add(list[i]); | ||
} | ||
} | ||
result.put(i,hashSet); | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package ffffffff0x.domainSplit.impl; | ||
|
||
import java.awt.*; | ||
import java.io.*; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Map; | ||
|
||
public class FileUtils { | ||
/** | ||
* 获取文件的byte数组格式 | ||
* @param file | ||
* @return | ||
*/ | ||
public static byte[] getFilebyte(File file){ | ||
FileInputStream fileInputStream; | ||
byte[] result = null; | ||
try { | ||
fileInputStream = new FileInputStream(file); | ||
result = new byte[fileInputStream.available()]; | ||
fileInputStream.read(result); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* 按行获取文本 | ||
* @param file | ||
* @return | ||
*/ | ||
public static ArrayList<String> getFileLines(File file){ | ||
ArrayList<String> result = new ArrayList<>(); | ||
try { | ||
FileInputStream inputStream = new FileInputStream(file); | ||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); | ||
|
||
String str; | ||
while((str = bufferedReader.readLine()) != null) | ||
{ | ||
result.add(str); | ||
} | ||
inputStream.close(); | ||
bufferedReader.close(); | ||
} catch (IOException e) { | ||
// e.printStackTrace(); | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* 保存文本格式文件至存储 | ||
* @param out | ||
* @param charset | ||
*/ | ||
public static void outPutFile(File file,String out,String charset){ | ||
if (!file.exists()) { | ||
file.getParentFile().mkdirs();// 目录不存在的情况下,创建目录。 | ||
} | ||
if(file!=null) { | ||
try { | ||
OutputStreamWriter OSW = new OutputStreamWriter(new FileOutputStream(file), charset); | ||
OSW.write(out); | ||
OSW.flush(); | ||
OSW.close(); | ||
// Desktop.getDesktop().open(file); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* 返回载入文件/字符串的按行分割后的ArrayList<String> | ||
* @param object | ||
* @return | ||
*/ | ||
public static ArrayList<String> readLine(Object object){ | ||
if(object instanceof File){ | ||
return FileUtils.getFileLines((File)object); | ||
}else { | ||
String text = (String)object; | ||
ArrayList<String> list = new ArrayList<>(); | ||
//把数组转成集合,也就是把数组里面的数据存进集合; | ||
Collections.addAll(list, text.split("\n")); | ||
return list; | ||
} | ||
} | ||
} |