-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
greg
committed
Sep 26, 2020
1 parent
15da25d
commit 42c6293
Showing
12 changed files
with
1,402 additions
and
0 deletions.
There are no files selected for viewing
1,001 changes: 1,001 additions & 0 deletions
1,001
bootstrap_data/data_shots_1000_costTolerance_0.01_learningRate_1.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from tqdm import tqdm | ||
|
||
from ml.bootstrap_model import BootstrapModel | ||
from ml.gradient_descent import get_best_params | ||
from quantum.util import get_random_params | ||
from util import get_bootstrap_file_name, get_bootstrap_row | ||
|
||
models_base_file_path = "./models" | ||
bootstrap_data_base_file_path = "./bootstrap_data" | ||
|
||
|
||
def get_equal_probability_params(shots=1000, cost_tolerance=0.01, learning_rate=1, bootstrap_model_available=True): | ||
# If model is available directly use it | ||
if bootstrap_model_available: | ||
params = get_random_params() | ||
bootstrap_model = BootstrapModel(models_base_file_path) | ||
predicted_params = bootstrap_model.predict_params(params) | ||
return get_best_params(predicted_params, shots, cost_tolerance, learning_rate) | ||
|
||
# If bootstrap_train_data is available, generate model and use it | ||
else: | ||
# If nothing is available generate bootstrap_train_data then generate model and then use predictions | ||
bootstrap_data_file_path = get_bootstrap_file_name(bootstrap_data_base_file_path, shots, cost_tolerance, | ||
learning_rate) | ||
bootstrap_file = open(bootstrap_data_file_path, "w+") | ||
bootstrap_file.write("start_param_1,start_param_2,final_param_1,final_param_2\n") | ||
|
||
iter = 1000 | ||
total_steps = 0 | ||
for i in tqdm(range(iter)): | ||
# Random param init | ||
params = get_random_params() | ||
start_params_1, start_params_2, final_params_1, final_params_2, steps = get_best_params(params, shots, | ||
cost_tolerance, | ||
learning_rate) | ||
total_steps += steps | ||
|
||
row = get_bootstrap_row(start_params_1, start_params_2, final_params_1, final_params_2) | ||
bootstrap_file.write(row) | ||
bootstrap_file.close() | ||
print("Average steps taken per iteration: " + str(total_steps / iter)) | ||
|
||
bootstrap_model = BootstrapModel() | ||
bootstrap_model.train_bootstrap_models(bootstrap_data_file_path, "./models") | ||
|
||
params = get_random_params() | ||
predicted_params = bootstrap_model.predict_params(params) | ||
return get_best_params(predicted_params, shots, cost_tolerance, learning_rate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from core.main import get_equal_probability_params | ||
|
||
results = get_equal_probability_params(shots=1000, cost_tolerance=0.01, learning_rate=1, bootstrap_model_available=True) | ||
print(results) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import joblib | ||
import lightgbm as lgb | ||
import pandas as pd | ||
from sklearn.metrics import mean_squared_error | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
class BootstrapModel: | ||
model_param_1 = None | ||
model_param_2 = None | ||
|
||
def __init__(self, model_base_path=None): | ||
if model_base_path: | ||
self.model_param_1 = joblib.load(model_base_path + "/model_param_1.pkl") | ||
self.model_param_2 = joblib.load(model_base_path + "/model_param_2.pkl") | ||
|
||
def train_model(self, df, target_column_name): | ||
X = df.drop(columns=["final_param_1", "final_param_2"]) | ||
y = df[[target_column_name]] | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | ||
params = { | ||
'boosting_type': 'gbdt', | ||
'objective': 'regression', | ||
'metric': ['mse'], | ||
'num_leaves': 1000, | ||
'learning_rate': 0.05, | ||
'feature_fraction': 0.9, | ||
'bagging_fraction': 0.8, | ||
'bagging_freq': 5, | ||
'verbose': 0 | ||
} | ||
gbm = lgb.LGBMRegressor(**params) | ||
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='mse', early_stopping_rounds=1000) | ||
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_) | ||
print('The mse of prediction is:', mean_squared_error(y_test, y_pred)) | ||
return gbm | ||
|
||
def train_bootstrap_models(self, loc_data_shots_file, loc_model_base_path): | ||
# df = pd.read_csv("./data_shots_1000_costTolerance_0.01_learningRate_1.csv") | ||
df = pd.read_csv(loc_data_shots_file) | ||
self.model_param_1 = self.train_model(df, "final_param_1") | ||
self.model_param_2 = self.train_model(df, "final_param_2") | ||
self.save_model(self.model_param_1, loc_model_base_path + "/model_param_1.pkl") | ||
self.save_model(self.model_param_2, loc_model_base_path + "/model_param_2.pkl") | ||
|
||
def save_model(self, model, path): | ||
joblib.dump(model, path) | ||
|
||
def predict_params(self, params): | ||
bootstrap_param_1 = self.model_param_1.predict([params]) | ||
bootstrap_param_2 = self.model_param_2.predict([params]) | ||
return [bootstrap_param_1, bootstrap_param_2] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
from math import pi | ||
from typing import List | ||
|
||
from quantum.util import get_counts, get_cost_vector | ||
|
||
|
||
def done_descending(costVector: List[float or int], shots: int, costTolerance: float) -> bool: | ||
""" | ||
Determines whether we should stop the algorithm or not | ||
:param costVector: List of ints that determine how far the count of each state is from the desired count for that state | ||
:param shots: Number of shots taken by the circuit | ||
:param costTolerance: Float value that determines the error margin that will be allowed. | ||
:return: True if all costs are lower than costTolerance * shots, False otherwise | ||
""" | ||
if not 1 >= costTolerance > 0: | ||
raise ValueError("costTolerance must be in the range (0, 1]") | ||
|
||
for cost in costVector: | ||
if cost > costTolerance * shots: | ||
return False | ||
return True | ||
|
||
|
||
def get_gradient(counts: dict) -> List[int]: | ||
""" | ||
The gradient will determine how strongly we will modify the values in params[] and in which direction. | ||
Let's remember again what we want each qubit to do: | ||
1) We want the first qubit to be measured as |0> 50% of the time, with the other 50% of the time being |1>, obviously. | ||
2) We want the second qubit to always be |1> before it reaches the CNOT. | ||
Remember that the counts give us information AFTER the CNOT. | ||
We can easily determine how far the first parameter (the one controlling the phase of the first qubit) is from its | ||
desired goal by seeing how often it is a |0> or a |1> once it has been measured. This can be inferred by adding the | ||
counts of |00> and |10>, we get the count of the first qubit being |0>. Of course, adding the counts of |01> and |11> | ||
gives us how often the first qubit is |1>. | ||
With that information, we can determine in which direction and how strongly we should modify the values in params[i] | ||
with a function such as (count |00> + count |10>) - (count |01> + count |11>). The more often |0> appears than |1>, | ||
the greater the output will be on the positive side, and the more often |1> appears than |0>, the greater the output | ||
will be on the negative side. This is what we will use as the gradient function for the parameter of the first qubit. | ||
The second qubit is a little trickier to think about, but the function is just as simple. We said we wanted the second | ||
qubit to always be |1> BEFORE it reaches the CNOT, but we don't have access to measurement before the CNOT. Then, how | ||
do we tell how we should modify the parameter for the RX gate on the second qubit? | ||
Going back to the other big comment in this file, we know that if there were no RX (or RY) gate on the second qubit, | ||
that the output will always be |00> or |11>, since the CNOT only flips the second qubit if the first one is flipped. | ||
This way, we know that if we are getting counts of |00> and |11> that are way too high above 0, the parameter of the | ||
gate is not where it should be. Therefore, the higher the total count of |00> and |11>, the stronger the change in | ||
phase we need to make, and a function that represents that well is simply the total count of |00> and |11> itself. | ||
There is a small problem with this function, and that is that it's only one-directional (we will never get a negative | ||
value for this function). This means that perhaps it would be optimal to decrease the value of the parameter at a given | ||
moment, rather than to increase it, but the function will always guide us to increase it. This isn't really a huge issue, | ||
since the phase is modulo 2*pi. What matters is that it tells us to increase the value at the right pace at every given | ||
moment: when we are really far from the goal, take big steps, and as we get closer to the goal, take smaller and smaller | ||
steps. The function proposed earlier does that very well. | ||
One extra thing to note: the function of the gradient of the second qubit highly depends on the effectiveness of the | ||
function of the first qubit. If the first one isn't working, the second one is flying blind. | ||
With these two functions dictating how strongly we should alter each parameter and tweaking some constants such as | ||
the learning rate that we will use later on, we will obtain the correct values for each parameter fairly quickly. | ||
:param counts: Dictionary containing the count of each state | ||
:return: List of ints that are a representation of how intensely we must change the values of params[] | ||
-params[0] corresponds to the phase of the RY gate of the first qubit | ||
-params[1] corresponds to the phase of the RX gate of the second qubit | ||
""" | ||
try: | ||
a = counts['00'] | ||
except KeyError: | ||
a = 0 | ||
try: | ||
b = counts['01'] | ||
except KeyError: | ||
b = 0 | ||
try: | ||
c = counts['10'] | ||
except KeyError: | ||
c = 0 | ||
try: | ||
d = counts['11'] | ||
except KeyError: | ||
d = 0 | ||
|
||
# We then want the total number of shots to know what proportions we should expect | ||
totalShots = a + b + c + d | ||
|
||
""" | ||
Let's quickly think about a way of improving the gradient than just the difference or addition of counts. | ||
First, let's note that without any modifications, the range of the gradients are [-0.5, 0.5] and [0, 1] for params[0] | ||
and params[1], respectively. If our values turned out to be at the extreme side of the undesired, then we know very | ||
well by how much we should change the phase. | ||
For example, if for params[1] we got a gradient of 1, that means that params[1] is somewhere around |0> and so it needs | ||
a shift of pi to be |1>. To get to that point very quickly, we can just multiply the value of the gradient by pi. | ||
That way, whenever the second parameter is on the complete opposite end of where it should be, we can move it very | ||
quickly to where it should. When I only modify the second gradient by multiplying it by pi, this often reduces the | ||
number of steps taken by the algorithm significantly (for example, from an average of 20 steps to an average of 6 steps). | ||
Now, you would think that the same goes for the first parameter: if the gradient is ±0.5, then params[0] is | ||
probably around 0 or pi, and we want to move it to pi/2 or 3*pi/2, so we need the gradient to be ±pi/2. We take | ||
advantage of the 0.5 and multiply it by pi in order to make the gradient equal to ±pi/2 whenever it is at | ||
either extreme. But this is not the case. For some reason that I don't fully comprehend, it makes the algorithm take | ||
more steps in general, especially when combined with the modification of the second gradient value. | ||
I suspect that it has to do with the fact that the second gradient value has a greater "tolerable margin of error" | ||
than the first gradient value. That is, params[1] could be anywhere from 3*pi/4 to 5*pi/4 and still output often | ||
enough it's intended goal, whilst that range of 2*pi/4 (or pi/2) means the opposite output of what is desired. Either | ||
way, I'd love to discuss it further with my mentor. | ||
""" | ||
return [((b + d) - (a + c)) / totalShots, pi * (a + d) / totalShots] | ||
|
||
|
||
def update_params(intial_params: List[float], gradient: List[int], learningRate: float or int) -> List[float]: | ||
""" | ||
Here we update the parameters according to the gradient. A very simple update function is just subtracting the gradient, | ||
and modulating the intensity of said gradient with a learning rate value. | ||
:param params: List of the parameters of the RY and RX gates of the circuit. | ||
:param gradient: List of values which represent how intensely we should modify each parameter | ||
:param learningRate: Float or int value to modulate the gradient | ||
""" | ||
updated_params = [None] * len(intial_params) | ||
for i in range(len(intial_params)): # For every parameter value | ||
# params[i] -= learningRate * gradient[i] # Subtract a modulated version of the value of the gradient | ||
updated_params[i] = intial_params[i] - learningRate * gradient[i] | ||
|
||
while updated_params[i] < 0: | ||
updated_params[i] = updated_params[i] + 2 * pi | ||
updated_params[i] = updated_params[i] % (2 * pi) | ||
|
||
return updated_params | ||
|
||
|
||
def get_best_params(params, shots=1000, cost_tolerance=0.01, learning_rate=1): | ||
""" | ||
Main method that does the entire gradient descent algorithm. | ||
:param params: Rotation angle parameters | ||
:param cost_tolerance: Float value that determines the error margin that will be allowed. | ||
:param learning_rate: Float or int value to modulate the gradient | ||
:param shots: Total number of shots the circuit must execute | ||
""" | ||
|
||
# Initialize variables | ||
descending = True # Symbolic, makes it easier for reader to follow | ||
counts = None | ||
cost_vector = None | ||
|
||
print("START PARAMS: " + str(params)) # Print to console the parameters we will begin with | ||
|
||
start_params = params | ||
steps = 0 # Keep track of how many steps were taken to descend | ||
while descending: | ||
# Get the initial counts that result from these parameters | ||
counts = get_counts(params, shots) | ||
|
||
# Find the cost of these parameters given the results they have produced | ||
cost_vector = get_cost_vector(counts) | ||
|
||
# Determine whether the cost is low enough to stop the algorithm | ||
if done_descending(cost_vector, shots, cost_tolerance): | ||
break | ||
|
||
# Calculate the gradient | ||
gradient = get_gradient(counts) | ||
|
||
# Update the params according to the gradient | ||
params = update_params(params, gradient, learning_rate) | ||
steps += 1 # Recording the number of steps taken | ||
|
||
# Show current situation | ||
# print("\tCOUNTS: " + str(counts) | ||
# + "\n\tCOST VECTOR: " + str(cost_vector) | ||
# + "\n\tGRADIENT: " + str(gradient) | ||
# + "\n\tUPDATED PARAMS: " + str(params) + "\n") | ||
|
||
# Print the obtained results | ||
print("FINAL RESULTS:" | ||
+ "\n\tCOUNTS: " + str(counts) | ||
+ "\n\tCOST VECTOR" + str(cost_vector) | ||
+ "\n\tSTART_PARAMS: " + str(start_params) | ||
+ "\n\tPARAMS: " + str(params) | ||
+ "\nSteps taken: " + str(steps)) | ||
return start_params[0], start_params[1], params[0], params[1], steps |
Binary file not shown.
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from math import pi | ||
from random import uniform as randUniform | ||
from typing import List | ||
|
||
import qiskit.providers.aer.noise as noise | ||
from qiskit import QuantumCircuit, execute, Aer | ||
|
||
|
||
def get_random_params(): | ||
params = [randUniform(0, 2 * pi), randUniform(0, 2 * pi)] | ||
return params | ||
|
||
|
||
def get_counts(params: List[float or int], shots: int = 1000) -> dict: | ||
""" | ||
Here we run the circuit according to the given parameters for each gate and return the counts for each state. | ||
:param params: List of the parameters of the RY and RX gates of the circuit. | ||
:param shots: Total number of shots the circuit must execute | ||
""" | ||
# Error probabilities | ||
prob_1 = 0.001 # 1-qubit gate | ||
prob_2 = 0.01 # 2-qubit gate | ||
|
||
# Depolarizing quantum errors | ||
error_1 = noise.depolarizing_error(prob_1, 1) | ||
error_2 = noise.depolarizing_error(prob_2, 2) | ||
|
||
# Add errors to noise model | ||
noise_model = noise.NoiseModel() | ||
noise_model.add_all_qubit_quantum_error(error_1, ['u1', 'u2']) | ||
noise_model.add_all_qubit_quantum_error(error_2, ['cx']) | ||
|
||
# Get basis gates from noise model | ||
basis_gates = noise_model.basis_gates | ||
|
||
# Make a circuit | ||
circ = QuantumCircuit(2, 2) | ||
|
||
# Set gates and measurement | ||
circ.ry(params[0], 0) | ||
circ.rx(params[1], 1) | ||
circ.cx(0, 1) | ||
circ.measure([0, 1], [0, 1]) | ||
|
||
# Perform a noisy simulation and get the counts | ||
# noinspection PyTypeChecker | ||
result = execute(circ, Aer.get_backend('qasm_simulator'), | ||
basis_gates=basis_gates, | ||
noise_model=noise_model, shots=shots).result() | ||
counts = result.get_counts(0) | ||
|
||
return counts | ||
|
||
|
||
def get_cost_vector(counts: dict) -> List[float]: | ||
""" | ||
This function simply gives values that represent how far away from our desired goal we are. Said desired goal is that | ||
we get as close to 0 counts for the states |00> and |11>, and as close to 50% of the total counts for |01> and |10> | ||
each. | ||
:param counts: Dictionary containing the count of each state | ||
:return: List of ints that determine how far the count of each state is from the desired count for that state: | ||
-First element corresponds to |00> | ||
-Second element corresponds to |01> | ||
-Third element corresponds to |10> | ||
-Fourth element corresponds to |11> | ||
""" | ||
# First we get the counts of each state. Try-except blocks are to avoid errors when the count is 0. | ||
try: | ||
a = counts['00'] | ||
except KeyError: | ||
a = 0 | ||
try: | ||
b = counts['01'] | ||
except KeyError: | ||
b = 0 | ||
try: | ||
c = counts['10'] | ||
except KeyError: | ||
c = 0 | ||
try: | ||
d = counts['11'] | ||
except KeyError: | ||
d = 0 | ||
|
||
# We then want the total number of shots to know what proportions we should expect | ||
totalShots = a + b + c + d | ||
|
||
# We return the absolute value of the difference of each state's observed and desired counts | ||
# Other systems to determine how far each state count is from the goal exist, but this one is simple and works well | ||
return [abs(a - 0), abs(b - totalShots / 2), abs(c - totalShots / 2), abs(d - 0)] |
Oops, something went wrong.